@@ -71,46 +71,56 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
7171 double * cost ,
7272 uint8_t * switch_signal ,
7373 uint8_t * block_id ) {
74- const size_t data_size = FN (HistogramDataSize )();
75- const size_t bitmaplen = (num_histograms + 7 ) >> 3 ;
74+ const size_t alphabet_size = FN (HistogramDataSize )();
75+ const size_t bitmap_len = (num_histograms + 7 ) >> 3 ;
7676 size_t num_blocks = 1 ;
77+ size_t byte_ix ;
7778 size_t i ;
7879 size_t j ;
7980 BROTLI_DCHECK (num_histograms <= 256 );
81+
82+ /* Trivial case: single historgram -> single block type. */
8083 if (num_histograms <= 1 ) {
8184 for (i = 0 ; i < length ; ++ i ) {
8285 block_id [i ] = 0 ;
8386 }
8487 return 1 ;
8588 }
86- memset (insert_cost , 0 , sizeof (insert_cost [0 ]) * data_size * num_histograms );
89+
90+ /* Fill bitcost for each symbol of all histograms.
91+ * Non-existing symbol cost: 2 + log2(total_count).
92+ * Regular symbol cost: -log2(symbol_count / total_count). */
93+ memset (insert_cost , 0 ,
94+ sizeof (insert_cost [0 ]) * alphabet_size * num_histograms );
8795 for (i = 0 ; i < num_histograms ; ++ i ) {
8896 insert_cost [i ] = FastLog2 ((uint32_t )histograms [i ].total_count_ );
8997 }
90- for (i = data_size ; i != 0 ;) {
98+ for (i = alphabet_size ; i != 0 ;) {
99+ /* Reverse order to use the 0-th row as a temporary storage. */
91100 -- i ;
92101 for (j = 0 ; j < num_histograms ; ++ j ) {
93102 insert_cost [i * num_histograms + j ] =
94103 insert_cost [j ] - BitCost (histograms [j ].data_ [i ]);
95104 }
96105 }
97- memset (cost , 0 , sizeof (cost [0 ]) * num_histograms );
98- memset (switch_signal , 0 , sizeof (switch_signal [0 ]) * length * bitmaplen );
106+
99107 /* After each iteration of this loop, cost[k] will contain the difference
100108 between the minimum cost of arriving at the current byte position using
101109 entropy code k, and the minimum cost of arriving at the current byte
102110 position. This difference is capped at the block switch cost, and if it
103111 reaches block switch cost, it means that when we trace back from the last
104112 position, we need to switch here. */
105- for (i = 0 ; i < length ; ++ i ) {
106- const size_t byte_ix = i ;
107- size_t ix = byte_ix * bitmaplen ;
108- size_t insert_cost_ix = data [byte_ix ] * num_histograms ;
113+ memset (cost , 0 , sizeof (cost [0 ]) * num_histograms );
114+ memset (switch_signal , 0 , sizeof (switch_signal [0 ]) * length * bitmap_len );
115+ for (byte_ix = 0 ; byte_ix < length ; ++ byte_ix ) {
116+ size_t ix = byte_ix * bitmap_len ;
117+ size_t symbol = data [byte_ix ];
118+ size_t insert_cost_ix = symbol * num_histograms ;
109119 double min_cost = 1e99 ;
110120 double block_switch_cost = block_switch_bitcost ;
111121 size_t k ;
112122 for (k = 0 ; k < num_histograms ; ++ k ) {
113- /* We are coding the symbol in data[byte_ix] with entropy code k. */
123+ /* We are coding the symbol with entropy code k. */
114124 cost [k ] += insert_cost [insert_cost_ix + k ];
115125 if (cost [k ] < min_cost ) {
116126 min_cost = cost [k ];
@@ -126,20 +136,21 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
126136 if (cost [k ] >= block_switch_cost ) {
127137 const uint8_t mask = (uint8_t )(1u << (k & 7 ));
128138 cost [k ] = block_switch_cost ;
129- BROTLI_DCHECK ((k >> 3 ) < bitmaplen );
139+ BROTLI_DCHECK ((k >> 3 ) < bitmap_len );
130140 switch_signal [ix + (k >> 3 )] |= mask ;
131141 }
132142 }
133143 }
144+
145+ byte_ix = length - 1 ;
134146 { /* Trace back from the last position and switch at the marked places. */
135- size_t byte_ix = length - 1 ;
136- size_t ix = byte_ix * bitmaplen ;
147+ size_t ix = byte_ix * bitmap_len ;
137148 uint8_t cur_id = block_id [byte_ix ];
138149 while (byte_ix > 0 ) {
139150 const uint8_t mask = (uint8_t )(1u << (cur_id & 7 ));
140- BROTLI_DCHECK (((size_t )cur_id >> 3 ) < bitmaplen );
151+ BROTLI_DCHECK (((size_t )cur_id >> 3 ) < bitmap_len );
141152 -- byte_ix ;
142- ix -= bitmaplen ;
153+ ix -= bitmap_len ;
143154 if (switch_signal [ix + (cur_id >> 3 )] & mask ) {
144155 if (cur_id != block_id [byte_ix ]) {
145156 cur_id = block_id [byte_ix ];
@@ -185,6 +196,8 @@ static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
185196 }
186197}
187198
199+ /* Given the initial partitioning build partitioning with limited number
200+ * of histograms (and block types). */
188201static void FN (ClusterBlocks )(MemoryManager * m ,
189202 const DataType * data , const size_t length ,
190203 const size_t num_blocks ,
@@ -228,6 +241,7 @@ static void FN(ClusterBlocks)(MemoryManager* m,
228241
229242 memset (block_lengths , 0 , num_blocks * sizeof (uint32_t ));
230243
244+ /* Calculate block lengths (convert repeating values -> series length). */
231245 {
232246 size_t block_idx = 0 ;
233247 for (i = 0 ; i < length ; ++ i ) {
@@ -240,15 +254,17 @@ static void FN(ClusterBlocks)(MemoryManager* m,
240254 BROTLI_DCHECK (block_idx == num_blocks );
241255 }
242256
257+ /* Pre-cluster blocks (cluster batches). */
243258 for (i = 0 ; i < num_blocks ; i += HISTOGRAMS_PER_BATCH ) {
244259 const size_t num_to_combine =
245260 BROTLI_MIN (size_t , num_blocks - i , HISTOGRAMS_PER_BATCH );
246261 size_t num_new_clusters ;
247262 size_t j ;
248263 for (j = 0 ; j < num_to_combine ; ++ j ) {
249264 size_t k ;
265+ size_t block_length = block_lengths [i + j ];
250266 FN (HistogramClear )(& histograms [j ]);
251- for (k = 0 ; k < block_lengths [ i + j ] ; ++ k ) {
267+ for (k = 0 ; k < block_length ; ++ k ) {
252268 FN (HistogramAdd )(& histograms [j ], data [pos ++ ]);
253269 }
254270 histograms [j ].bit_cost_ = FN (BrotliPopulationCost )(& histograms [j ]);
@@ -278,14 +294,14 @@ static void FN(ClusterBlocks)(MemoryManager* m,
278294 }
279295 BROTLI_FREE (m , histograms );
280296
297+ /* Final clustering. */
281298 max_num_pairs =
282299 BROTLI_MIN (size_t , 64 * num_clusters , (num_clusters / 2 ) * num_clusters );
283300 if (pairs_capacity < max_num_pairs + 1 ) {
284301 BROTLI_FREE (m , pairs );
285302 pairs = BROTLI_ALLOC (m , HistogramPair , max_num_pairs + 1 );
286303 if (BROTLI_IS_OOM (m ) || BROTLI_IS_NULL (pairs )) return ;
287304 }
288-
289305 clusters = BROTLI_ALLOC (m , uint32_t , num_clusters );
290306 if (BROTLI_IS_OOM (m ) || BROTLI_IS_NULL (clusters )) return ;
291307 for (i = 0 ; i < num_clusters ; ++ i ) {
@@ -298,6 +314,7 @@ static void FN(ClusterBlocks)(MemoryManager* m,
298314 BROTLI_FREE (m , pairs );
299315 BROTLI_FREE (m , cluster_size );
300316
317+ /* Assign blocks to final histograms. */
301318 new_index = BROTLI_ALLOC (m , uint32_t , num_clusters );
302319 if (BROTLI_IS_OOM (m ) || BROTLI_IS_NULL (new_index )) return ;
303320 for (i = 0 ; i < num_clusters ; ++ i ) new_index [i ] = kInvalidIndex ;
@@ -313,6 +330,8 @@ static void FN(ClusterBlocks)(MemoryManager* m,
313330 for (j = 0 ; j < block_lengths [i ]; ++ j ) {
314331 FN (HistogramAdd )(& histo , data [pos ++ ]);
315332 }
333+ /* Among equally good histograms prefer last used. */
334+ /* TODO: should we give a block-switch discount here? */
316335 best_out = (i == 0 ) ? histogram_symbols [0 ] : histogram_symbols [i - 1 ];
317336 best_bits =
318337 FN (BrotliHistogramBitCostDistance )(& histo , & all_histograms [best_out ]);
@@ -337,6 +356,9 @@ static void FN(ClusterBlocks)(MemoryManager* m,
337356 BROTLI_ENSURE_CAPACITY (
338357 m , uint32_t , split -> lengths , split -> lengths_alloc_size , num_blocks );
339358 if (BROTLI_IS_OOM (m )) return ;
359+
360+ /* Rewrite final assignment to block-split. There might be less blocks
361+ * than |num_blocks| due to clustering. */
340362 {
341363 uint32_t cur_length = 0 ;
342364 size_t block_idx = 0 ;
@@ -361,24 +383,36 @@ static void FN(ClusterBlocks)(MemoryManager* m,
361383 BROTLI_FREE (m , histogram_symbols );
362384}
363385
386+ /* Create BlockSplit (partitioning) given the limits, estimates and "effort"
387+ * parameters.
388+ *
389+ * NB: max_histograms is often less than number of histograms allowed by format;
390+ * this is done intentionally, to save some "space" for context-aware
391+ * clustering (here entropy is estimated for context-free symbols). */
364392static void FN (SplitByteVector )(MemoryManager * m ,
365393 const DataType * data , const size_t length ,
366- const size_t literals_per_histogram ,
394+ const size_t symbols_per_histogram ,
367395 const size_t max_histograms ,
368396 const size_t sampling_stride_length ,
369397 const double block_switch_cost ,
370398 const BrotliEncoderParams * params ,
371399 BlockSplit * split ) {
372400 const size_t data_size = FN (HistogramDataSize )();
373- size_t num_histograms = length / literals_per_histogram + 1 ;
374401 HistogramType * histograms ;
402+ /* Calculate number of histograms; initial estimate is one histogram per
403+ * specified amount of symbols; however, this value is capped. */
404+ size_t num_histograms = length / symbols_per_histogram + 1 ;
375405 if (num_histograms > max_histograms ) {
376406 num_histograms = max_histograms ;
377407 }
408+
409+ /* Corner case: no input. */
378410 if (length == 0 ) {
379411 split -> num_types = 1 ;
380412 return ;
381- } else if (length < kMinLengthForBlockSplitting ) {
413+ }
414+
415+ if (length < kMinLengthForBlockSplitting ) {
382416 BROTLI_ENSURE_CAPACITY (m , uint8_t ,
383417 split -> types , split -> types_alloc_size , split -> num_blocks + 1 );
384418 BROTLI_ENSURE_CAPACITY (m , uint32_t ,
0 commit comments