@@ -71,46 +71,56 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
7171 double * cost ,
7272 uint8_t * switch_signal ,
7373 uint8_t * block_id ) {
74- const size_t data_size = FN (HistogramDataSize )();
75- const size_t bitmaplen = (num_histograms + 7 ) >> 3 ;
74+ const size_t alphabet_size = FN (HistogramDataSize )();
75+ const size_t bitmap_len = (num_histograms + 7 ) >> 3 ;
7676 size_t num_blocks = 1 ;
77+ size_t byte_ix ;
7778 size_t i ;
7879 size_t j ;
7980 BROTLI_DCHECK (num_histograms <= 256 );
81+
82+ /* Trivial case: single historgram -> single block type. */
8083 if (num_histograms <= 1 ) {
8184 for (i = 0 ; i < length ; ++ i ) {
8285 block_id [i ] = 0 ;
8386 }
8487 return 1 ;
8588 }
86- memset (insert_cost , 0 , sizeof (insert_cost [0 ]) * data_size * num_histograms );
89+
90+ /* Fill bitcost for each symbol of all histograms.
91+ * Non-existing symbol cost: 2 + log2(total_count).
92+ * Regular symbol cost: -log2(symbol_count / total_count). */
93+ memset (insert_cost , 0 ,
94+ sizeof (insert_cost [0 ]) * alphabet_size * num_histograms );
8795 for (i = 0 ; i < num_histograms ; ++ i ) {
8896 insert_cost [i ] = FastLog2 ((uint32_t )histograms [i ].total_count_ );
8997 }
90- for (i = data_size ; i != 0 ;) {
98+ for (i = alphabet_size ; i != 0 ;) {
99+ /* Reverse order to use the 0-th row as a temporary storage. */
91100 -- i ;
92101 for (j = 0 ; j < num_histograms ; ++ j ) {
93102 insert_cost [i * num_histograms + j ] =
94103 insert_cost [j ] - BitCost (histograms [j ].data_ [i ]);
95104 }
96105 }
97- memset (cost , 0 , sizeof (cost [0 ]) * num_histograms );
98- memset (switch_signal , 0 , sizeof (switch_signal [0 ]) * length * bitmaplen );
106+
99107 /* After each iteration of this loop, cost[k] will contain the difference
100108 between the minimum cost of arriving at the current byte position using
101109 entropy code k, and the minimum cost of arriving at the current byte
102110 position. This difference is capped at the block switch cost, and if it
103111 reaches block switch cost, it means that when we trace back from the last
104112 position, we need to switch here. */
105- for (i = 0 ; i < length ; ++ i ) {
106- const size_t byte_ix = i ;
107- size_t ix = byte_ix * bitmaplen ;
108- size_t insert_cost_ix = data [byte_ix ] * num_histograms ;
113+ memset (cost , 0 , sizeof (cost [0 ]) * num_histograms );
114+ memset (switch_signal , 0 , sizeof (switch_signal [0 ]) * length * bitmap_len );
115+ for (byte_ix = 0 ; byte_ix < length ; ++ byte_ix ) {
116+ size_t ix = byte_ix * bitmap_len ;
117+ size_t symbol = data [byte_ix ];
118+ size_t insert_cost_ix = symbol * num_histograms ;
109119 double min_cost = 1e99 ;
110120 double block_switch_cost = block_switch_bitcost ;
111121 size_t k ;
112122 for (k = 0 ; k < num_histograms ; ++ k ) {
113- /* We are coding the symbol in data[byte_ix] with entropy code k. */
123+ /* We are coding the symbol with entropy code k. */
114124 cost [k ] += insert_cost [insert_cost_ix + k ];
115125 if (cost [k ] < min_cost ) {
116126 min_cost = cost [k ];
@@ -126,20 +136,20 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
126136 if (cost [k ] >= block_switch_cost ) {
127137 const uint8_t mask = (uint8_t )(1u << (k & 7 ));
128138 cost [k ] = block_switch_cost ;
129- BROTLI_DCHECK ((k >> 3 ) < bitmaplen );
139+ BROTLI_DCHECK ((k >> 3 ) < bitmap_len );
130140 switch_signal [ix + (k >> 3 )] |= mask ;
131141 }
132142 }
133143 }
134144 { /* Trace back from the last position and switch at the marked places. */
135- size_t byte_ix = length - 1 ;
136- size_t ix = byte_ix * bitmaplen ;
145+ byte_ix = length - 1 ;
146+ size_t ix = byte_ix * bitmap_len ;
137147 uint8_t cur_id = block_id [byte_ix ];
138148 while (byte_ix > 0 ) {
139149 const uint8_t mask = (uint8_t )(1u << (cur_id & 7 ));
140- BROTLI_DCHECK (((size_t )cur_id >> 3 ) < bitmaplen );
150+ BROTLI_DCHECK (((size_t )cur_id >> 3 ) < bitmap_len );
141151 -- byte_ix ;
142- ix -= bitmaplen ;
152+ ix -= bitmap_len ;
143153 if (switch_signal [ix + (cur_id >> 3 )] & mask ) {
144154 if (cur_id != block_id [byte_ix ]) {
145155 cur_id = block_id [byte_ix ];
@@ -185,6 +195,8 @@ static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
185195 }
186196}
187197
198+ /* Given the initial partitioning build partitioning with limited number
199+ * of histograms (and block types). */
188200static void FN (ClusterBlocks )(MemoryManager * m ,
189201 const DataType * data , const size_t length ,
190202 const size_t num_blocks ,
@@ -228,6 +240,7 @@ static void FN(ClusterBlocks)(MemoryManager* m,
228240
229241 memset (block_lengths , 0 , num_blocks * sizeof (uint32_t ));
230242
243+ /* Calculate block lengths (convert repeating values -> series length). */
231244 {
232245 size_t block_idx = 0 ;
233246 for (i = 0 ; i < length ; ++ i ) {
@@ -240,15 +253,17 @@ static void FN(ClusterBlocks)(MemoryManager* m,
240253 BROTLI_DCHECK (block_idx == num_blocks );
241254 }
242255
256+ /* Pre-cluster blocks (cluster batches). */
243257 for (i = 0 ; i < num_blocks ; i += HISTOGRAMS_PER_BATCH ) {
244258 const size_t num_to_combine =
245259 BROTLI_MIN (size_t , num_blocks - i , HISTOGRAMS_PER_BATCH );
246260 size_t num_new_clusters ;
247261 size_t j ;
248262 for (j = 0 ; j < num_to_combine ; ++ j ) {
249263 size_t k ;
264+ size_t block_length = block_lengths [i + j ];
250265 FN (HistogramClear )(& histograms [j ]);
251- for (k = 0 ; k < block_lengths [ i + j ] ; ++ k ) {
266+ for (k = 0 ; k < block_length ; ++ k ) {
252267 FN (HistogramAdd )(& histograms [j ], data [pos ++ ]);
253268 }
254269 histograms [j ].bit_cost_ = FN (BrotliPopulationCost )(& histograms [j ]);
@@ -278,14 +293,14 @@ static void FN(ClusterBlocks)(MemoryManager* m,
278293 }
279294 BROTLI_FREE (m , histograms );
280295
296+ /* Final clustering. */
281297 max_num_pairs =
282298 BROTLI_MIN (size_t , 64 * num_clusters , (num_clusters / 2 ) * num_clusters );
283299 if (pairs_capacity < max_num_pairs + 1 ) {
284300 BROTLI_FREE (m , pairs );
285301 pairs = BROTLI_ALLOC (m , HistogramPair , max_num_pairs + 1 );
286302 if (BROTLI_IS_OOM (m ) || BROTLI_IS_NULL (pairs )) return ;
287303 }
288-
289304 clusters = BROTLI_ALLOC (m , uint32_t , num_clusters );
290305 if (BROTLI_IS_OOM (m ) || BROTLI_IS_NULL (clusters )) return ;
291306 for (i = 0 ; i < num_clusters ; ++ i ) {
@@ -298,6 +313,7 @@ static void FN(ClusterBlocks)(MemoryManager* m,
298313 BROTLI_FREE (m , pairs );
299314 BROTLI_FREE (m , cluster_size );
300315
316+ /* Assign blocks to final histograms. */
301317 new_index = BROTLI_ALLOC (m , uint32_t , num_clusters );
302318 if (BROTLI_IS_OOM (m ) || BROTLI_IS_NULL (new_index )) return ;
303319 for (i = 0 ; i < num_clusters ; ++ i ) new_index [i ] = kInvalidIndex ;
@@ -313,6 +329,8 @@ static void FN(ClusterBlocks)(MemoryManager* m,
313329 for (j = 0 ; j < block_lengths [i ]; ++ j ) {
314330 FN (HistogramAdd )(& histo , data [pos ++ ]);
315331 }
332+ /* Among equally good histograms prefer last used. */
333+ /* TODO: should we give a block-switch discount here? */
316334 best_out = (i == 0 ) ? histogram_symbols [0 ] : histogram_symbols [i - 1 ];
317335 best_bits =
318336 FN (BrotliHistogramBitCostDistance )(& histo , & all_histograms [best_out ]);
@@ -337,6 +355,9 @@ static void FN(ClusterBlocks)(MemoryManager* m,
337355 BROTLI_ENSURE_CAPACITY (
338356 m , uint32_t , split -> lengths , split -> lengths_alloc_size , num_blocks );
339357 if (BROTLI_IS_OOM (m )) return ;
358+
359+ /* Rewrite final assignment to block-split. There might be less blocks
360+ * than |num_blocks| due to clustering. */
340361 {
341362 uint32_t cur_length = 0 ;
342363 size_t block_idx = 0 ;
@@ -361,24 +382,36 @@ static void FN(ClusterBlocks)(MemoryManager* m,
361382 BROTLI_FREE (m , histogram_symbols );
362383}
363384
385+ /* Create BlockSplit (partitioning) given the limits, estimates and "effort"
386+ * parameters.
387+ *
388+ * NB: max_histograms is often less than number of histograms allowed by format;
389+ * this is done intentionally, to save some "space" for context-aware
390+ * clustering (here entropy is estimated for context-free symbols). */
364391static void FN (SplitByteVector )(MemoryManager * m ,
365392 const DataType * data , const size_t length ,
366- const size_t literals_per_histogram ,
393+ const size_t symbols_per_histogram ,
367394 const size_t max_histograms ,
368395 const size_t sampling_stride_length ,
369396 const double block_switch_cost ,
370397 const BrotliEncoderParams * params ,
371398 BlockSplit * split ) {
372399 const size_t data_size = FN (HistogramDataSize )();
373- size_t num_histograms = length / literals_per_histogram + 1 ;
374400 HistogramType * histograms ;
401+ /* Calculate number of histograms; initial estimate is one histogram per
402+ * specified amount of symbols; however, this value is capped. */
403+ size_t num_histograms = length / symbols_per_histogram + 1 ;
375404 if (num_histograms > max_histograms ) {
376405 num_histograms = max_histograms ;
377406 }
407+
408+ /* Corner case: no input. */
378409 if (length == 0 ) {
379410 split -> num_types = 1 ;
380411 return ;
381- } else if (length < kMinLengthForBlockSplitting ) {
412+ }
413+
414+ if (length < kMinLengthForBlockSplitting ) {
382415 BROTLI_ENSURE_CAPACITY (m , uint8_t ,
383416 split -> types , split -> types_alloc_size , split -> num_blocks + 1 );
384417 BROTLI_ENSURE_CAPACITY (m , uint32_t ,
0 commit comments