Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
265 changes: 136 additions & 129 deletions src/huffman.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,18 @@
//! Rudimentary utility for reading Canonical Huffman Codes.
//! Based off <https://github.com/webmproject/libwebp/blob/7f8472a610b61ec780ef0a8873cd954ac512a505/src/utils/huffman.c>

use std::io::BufRead;

use crate::decoder::DecodingError;

use super::lossless::BitReader;
use crate::lossless::BitReader;

const MAX_ALLOWED_CODE_LENGTH: usize = 15;
const MAX_TABLE_BITS: u8 = 10;

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum HuffmanTreeNode {
Branch(usize), //offset in vector to children
Leaf(u16), //symbol stored in leaf
Empty,
}

#[derive(Clone, Debug)]
enum HuffmanTreeInner {
Single(u16),
Tree {
tree: Vec<HuffmanTreeNode>,
table: Vec<u32>,
table_mask: u16,
primary_table: Vec<u16>,
secondary_table: Vec<u16>,
},
}

Expand All @@ -38,14 +27,31 @@ impl Default for HuffmanTree {
}

impl HuffmanTree {
/// Return the next code, or if the codeword is already all ones (which is the final code), return
/// the same code again.
fn next_codeword(mut codeword: u16, table_size: u16) -> u16 {
if codeword == table_size - 1 {
return codeword;
}

let adv = (u16::BITS - 1) - (codeword ^ (table_size - 1)).leading_zeros();
let bit = 1 << adv;
codeword &= bit - 1;
codeword |= bit;
codeword
}

/// Builds a tree implicitly, just from code lengths
#[allow(clippy::needless_range_loop)]
pub(crate) fn build_implicit(code_lengths: Vec<u16>) -> Result<Self, DecodingError> {
// Count symbols and build histogram
let mut num_symbols = 0;
let mut code_length_hist = [0; MAX_ALLOWED_CODE_LENGTH + 1];
for &length in code_lengths.iter().filter(|&&x| x != 0) {
code_length_hist[usize::from(length)] += 1;
num_symbols += 1;
let mut histogram = [0; MAX_ALLOWED_CODE_LENGTH + 1];
for &length in code_lengths.iter() {
histogram[usize::from(length)] += 1;
if length != 0 {
num_symbols += 1;
}
}

// Handle special cases
Expand All @@ -56,96 +62,113 @@ impl HuffmanTree {
return Ok(Self::build_single_node(root_symbol));
};

// Assign codes
let mut curr_code = 0;
let mut next_codes = [0; MAX_ALLOWED_CODE_LENGTH + 1];
let max_code_length = code_length_hist.iter().rposition(|&x| x != 0).unwrap() as u16;
for code_len in 1..usize::from(max_code_length) + 1 {
next_codes[code_len] = curr_code;
curr_code = (curr_code + code_length_hist[code_len]) << 1;
// Determine the maximum code length.
let mut max_length = MAX_ALLOWED_CODE_LENGTH;
while max_length > 1 && histogram[max_length] == 0 {
max_length -= 1;
}

// Sort symbols by code length. Given the histogram, we can determine the starting offset
// for each code length.
let mut offsets = [0; 16];
let mut codespace_used = 0;
offsets[1] = histogram[0];
for i in 1..max_length {
offsets[i + 1] = offsets[i] + histogram[i];
codespace_used = (codespace_used << 1) + histogram[i];
}
codespace_used = (codespace_used << 1) + histogram[max_length];

// Confirm that the huffman tree is valid
if curr_code != 2 << max_code_length {
if codespace_used != (1 << max_length) {
return Err(DecodingError::HuffmanError);
}

// Calculate table/tree parameters
let table_bits = max_code_length.min(u16::from(MAX_TABLE_BITS));
let table_bits = (max_length as u16).min(u16::from(MAX_TABLE_BITS));
let table_size = (1 << table_bits) as usize;
let table_mask = table_size as u16 - 1;
let tree_size = code_length_hist[table_bits as usize + 1..=max_code_length as usize]
.iter()
.sum::<u16>() as usize;

// Populate decoding table
let mut tree = Vec::with_capacity(2 * tree_size);
let mut table = vec![0; table_size];
for (symbol, &length) in code_lengths.iter().enumerate() {
if length == 0 {
continue;
let mut primary_table = vec![0; table_size];

// Sort the symbols by code length.
let mut next_index = offsets;
let mut sorted_symbols = vec![0u16; code_lengths.len()];
for symbol in 0..code_lengths.len() {
let length = code_lengths[symbol];
sorted_symbols[next_index[length as usize]] = symbol as u16;
next_index[length as usize] += 1;
}

let mut codeword = 0u16;
let mut i = histogram[0];

// Populate the primary decoding table
let primary_table_bits = primary_table.len().ilog2() as usize;
let primary_table_mask = (1 << primary_table_bits) - 1;
for length in 1..=primary_table_bits {
let current_table_end = 1 << length;

// Loop over all symbols with the current code length and set their table entries.
for _ in 0..histogram[length] {
let symbol = sorted_symbols[i];
i += 1;

let entry = ((length as u16) << 12) | symbol;
primary_table[codeword as usize] = entry;

codeword = Self::next_codeword(codeword, current_table_end as u16);
}

// If we aren't at the maximum table size, double the size of the table.
if length < primary_table_bits {
primary_table.copy_within(0..current_table_end, current_table_end);
}
}

// Populate the secondary decoding table.
let mut secondary_table = Vec::new();
if max_length > primary_table_bits {
let mut subtable_start = 0;
let mut subtable_prefix = !0;
for length in (primary_table_bits + 1)..=max_length {
let subtable_size = 1 << (length - primary_table_bits);
for _ in 0..histogram[length] {
// If the codeword's prefix doesn't match the current subtable, create a new
// subtable.
if codeword & primary_table_mask != subtable_prefix {
subtable_prefix = codeword & primary_table_mask;
subtable_start = secondary_table.len();
primary_table[subtable_prefix as usize] =
((length as u16) << 12) | subtable_start as u16;
secondary_table.resize(subtable_start + subtable_size, 0);
}

let code = next_codes[length as usize];
next_codes[length as usize] += 1;
// Lookup the symbol.
let symbol = sorted_symbols[i];
i += 1;

if length <= table_bits {
let mut j = (u16::reverse_bits(code) >> (16 - length)) as usize;
let entry = (u32::from(length) << 16) | symbol as u32;
while j < table_size {
table[j] = entry;
j += 1 << length as usize;
}
} else {
let table_index =
((u16::reverse_bits(code) >> (16 - length)) & table_mask) as usize;
let table_value = table[table_index];

debug_assert_eq!(table_value >> 16, 0);

let mut node_index = if table_value == 0 {
let node_index = tree.len();
table[table_index] = (node_index + 1) as u32;
tree.push(HuffmanTreeNode::Empty);
node_index
} else {
(table_value - 1) as usize
};

let code = usize::from(code);
for depth in (0..length - table_bits).rev() {
let node = tree[node_index];

let offset = match node {
HuffmanTreeNode::Empty => {
// Turns a node from empty into a branch and assigns its children
let offset = tree.len() - node_index;
tree[node_index] = HuffmanTreeNode::Branch(offset);
tree.push(HuffmanTreeNode::Empty);
tree.push(HuffmanTreeNode::Empty);
offset
}
HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError),
HuffmanTreeNode::Branch(offset) => offset,
};

node_index += offset + ((code >> depth) & 1);
// Insert the symbol into the secondary table and advance to the next codeword.
secondary_table[subtable_start + (codeword >> primary_table_bits) as usize] =
(symbol << 4) | (length as u16);
codeword = Self::next_codeword(codeword, 1 << length);
}

match tree[node_index] {
HuffmanTreeNode::Empty => {
tree[node_index] = HuffmanTreeNode::Leaf(symbol as u16);
}
HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError),
HuffmanTreeNode::Branch(_offset) => return Err(DecodingError::HuffmanError),
// If there are more codes with the same subtable prefix, extend the subtable.
if length < max_length && codeword & primary_table_mask == subtable_prefix {
secondary_table.extend_from_within(subtable_start..);
primary_table[subtable_prefix as usize] =
(((length + 1) as u16) << 12) | subtable_start as u16;
}
}
}

// Ensure indexes into the secondary table fit in 12 bits.
assert!(secondary_table.len() <= 4096);

Ok(Self(HuffmanTreeInner::Tree {
tree,
table,
table_mask,
primary_table,
secondary_table,
}))
}

Expand All @@ -155,13 +178,9 @@ impl HuffmanTree {

pub(crate) fn build_two_node(zero: u16, one: u16) -> Self {
Self(HuffmanTreeInner::Tree {
tree: vec![
HuffmanTreeNode::Leaf(zero),
HuffmanTreeNode::Leaf(one),
HuffmanTreeNode::Empty,
],
table: vec![(1 << 16) | u32::from(zero), (1 << 16) | u32::from(one)],
primary_table: vec![(1 << 12) | zero, (1 << 12) | one],
table_mask: 0x1,
secondary_table: Vec::new(),
})
}

Expand All @@ -171,27 +190,18 @@ impl HuffmanTree {

#[inline(never)]
fn read_symbol_slowpath<R: BufRead>(
tree: &[HuffmanTreeNode],
mut v: usize,
start_index: usize,
secondary_table: &[u16],
v: u16,
primary_table_entry: u16,
bit_reader: &mut BitReader<R>,
) -> Result<u16, DecodingError> {
let mut depth = MAX_TABLE_BITS;
let mut index = start_index;
loop {
match &tree[index] {
HuffmanTreeNode::Branch(children_offset) => {
index += children_offset + (v & 1);
depth += 1;
v >>= 1;
}
HuffmanTreeNode::Leaf(symbol) => {
bit_reader.consume(depth)?;
return Ok(*symbol);
}
HuffmanTreeNode::Empty => return Err(DecodingError::HuffmanError),
}
}
let length = primary_table_entry >> 12;
let mask = (1 << (length - MAX_TABLE_BITS as u16)) - 1;
let secondary_index = ((primary_table_entry & 0xfff) as usize)
+ ((v >> MAX_TABLE_BITS) as usize & mask as usize);
let secondary_entry = secondary_table[secondary_index];
bit_reader.consume((secondary_entry & 0xf) as u8)?;
Ok(secondary_entry >> 4)
}

/// Reads a symbol using the bit reader.
Expand All @@ -204,23 +214,18 @@ impl HuffmanTree {
) -> Result<u16, DecodingError> {
match &self.0 {
HuffmanTreeInner::Tree {
tree,
table,
primary_table,
secondary_table,
table_mask,
} => {
let v = bit_reader.peek_full() as u16;
let entry = table[(v & table_mask) as usize];
if entry >> 16 != 0 {
bit_reader.consume((entry >> 16) as u8)?;
return Ok(entry as u16);
let entry = primary_table[(v & table_mask) as usize];
if (entry >> 12) <= MAX_TABLE_BITS as u16 {
bit_reader.consume((entry >> 12) as u8)?;
return Ok(entry & 0xfff);
}

Self::read_symbol_slowpath(
tree,
(v >> MAX_TABLE_BITS) as usize,
((entry & 0xffff) - 1) as usize,
bit_reader,
)
Self::read_symbol_slowpath(secondary_table, v, entry, bit_reader)
}
HuffmanTreeInner::Single(symbol) => Ok(*symbol),
}
Expand All @@ -233,12 +238,14 @@ impl HuffmanTree {
pub(crate) fn peek_symbol<R: BufRead>(&self, bit_reader: &BitReader<R>) -> Option<(u8, u16)> {
match &self.0 {
HuffmanTreeInner::Tree {
table, table_mask, ..
primary_table,
table_mask,
..
} => {
let v = bit_reader.peek_full() as u16;
let entry = table[(v & table_mask) as usize];
if entry >> 16 != 0 {
return Some(((entry >> 16) as u8, entry as u16));
let entry = primary_table[(v & table_mask) as usize];
if (entry >> 12) <= MAX_TABLE_BITS as u16 {
return Some(((entry >> 12) as u8, entry & 0xfff));
}
None
}
Expand Down
Loading