Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions lib/common/fse.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
const short* normalizedCounter,
unsigned maxSymbolValue, unsigned tableLog);

/*! FSE_estimateNCountSize():
Estimate space requirement to save 'normalizedCounter'.
@return : size of the compressed table,
or an errorCode, which can be tested using FSE_isError(). */
FSE_PUBLIC_API size_t FSE_estimateNCountSize(const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);

/*! Constructor and Destructor of FSE_CTable.
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
Expand All @@ -179,6 +185,11 @@ FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCou
or an errorCode, which can be tested using FSE_isError() */
FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);

/*! FSE_estimateCompressedSize():
Estimate compressed size of data based on sum of fractional bit counts, will be >= actual cost.
@return : estimated size of compressed data */
FSE_PUBLIC_API size_t FSE_estimateCompressedSize (const FSE_CTable* ct, const unsigned* count, unsigned maxSymbolValue, unsigned tableLog);

/*!
Tutorial :
----------
Expand Down Expand Up @@ -317,6 +328,7 @@ If there is an error, the function will return an error code, which can be teste
* FSE advanced API
***************************************** */

unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue);
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
/**< same as FSE_optimalTableLog(), which used `minus==2` */

Expand Down
31 changes: 21 additions & 10 deletions lib/common/huf.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ extern "C" {
* Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
* 'dst' buffer must be already allocated.
* Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
* If depthStrategy = HUF_seek_neighbor additional huffman depths will be considered
* `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
* @return : size of compressed data (<= `dstCapacity`).
* Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
Expand Down Expand Up @@ -79,23 +80,30 @@ HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error c

/* *** Advanced function *** */

typedef enum {
HUF_default,
HUF_seek_neighbors
} HUF_depthStrategy;

/** HUF_compress2() :
* Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
* `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
* `tableLog` must be `<= HUF_TABLELOG_MAX` . */
* `tableLog` must be `<= HUF_TABLELOG_MAX` .
* If depthStrategy = HUF_seek_neighbor additional huffman depths will be considered */
HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog);
unsigned maxSymbolValue, unsigned tableLog, HUF_depthStrategy depthStrategy);

/** HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
* `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
* `workspace` must be at least as large as HUF_WORKSPACE_SIZE
* If depthStrategy = HUF_seek_neighbor additional huffman depths will be considered */
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize);
HUF_depthStrategy depthStrategy, void* workSpace, size_t wkspSize);

#endif /* HUF_H_298734234 */

Expand Down Expand Up @@ -185,8 +193,10 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
* For example, it's possible to compress several blocks using the same 'CTable',
* or to save and regenerate 'CTable' using external methods.
*/
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, const unsigned* count, unsigned maxSymbolValue, HUF_depthStrategy strategy);
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
size_t HUF_getEncodedCTableSize (const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_getEncodedCTableSize_wksp (const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
Expand All @@ -204,12 +214,13 @@ typedef enum {
* If it uses hufTable it does not modify hufTable or repeat.
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
* If preferRepeat then the old table will always be used if valid.
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding
* If depthStrategy = HUF_seek_neighbor additional huffman depths will be considered */
size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible, HUF_depthStrategy depthStrategy);

/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
Expand Down Expand Up @@ -301,8 +312,8 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c
/* single stream variants */
/* ====================== */

size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, HUF_depthStrategy depthStrategy);
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, HUF_depthStrategy depthStrategy,void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
/** HUF_compress1X_repeat() :
Expand All @@ -315,7 +326,7 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible, HUF_depthStrategy depthStrategy);

size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
#ifndef HUF_FORCE_DECOMPRESS_X1
Expand Down
59 changes: 58 additions & 1 deletion lib/compress/fse_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,48 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize,
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
}

size_t FSE_estimateNCountSize(const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
{
int nbBits;
const int tableSize = 1 << tableLog;
int remaining;
int threshold;
int bitCount = 0;
unsigned symbol = 0;
unsigned const alphabetSize = maxSymbolValue + 1;
int previousIs0 = 0;

/* Table Size */
bitCount += 4;

/* Init */
remaining = tableSize + 1; /* +1 for extra accuracy */
threshold = tableSize;
nbBits = tableLog + 1;

while ((symbol < alphabetSize) && (remaining > 1)) { /* stops at 1 */
if (previousIs0) {
unsigned start = symbol;
while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
if (symbol == alphabetSize) break; /* incorrect distribution */
bitCount += ((symbol - start) / 24) * 16;
bitCount += (((symbol - start) % 24 + 2) / 3) * 2;
}
{ int count = normalizedCounter[symbol++];
int const max = (2 * threshold - 1) - remaining;
remaining -= count < 0 ? -count : count;
count++; /* +1 for extra accuracy */
if (count >= threshold)
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
bitCount += nbBits;
bitCount -= (count < max);
previousIs0 = (count == 1);
while (remaining < threshold) { nbBits--; threshold >>= 1; }
}
}

return (bitCount + 7) / 8;
}

/*-**************************************************************
* FSE Compression Code
Expand All @@ -353,7 +395,7 @@ FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }

/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
Expand Down Expand Up @@ -589,6 +631,21 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
return 0;
}

size_t FSE_estimateCompressedSize(const FSE_CTable* ct, const unsigned* count, unsigned maxSymbolValue, unsigned tableLog)
{
U32 const tableSize = 1 << tableLog;
const void* const ptr = ct;
const void* const FSCT = ((const U32*)ptr) + 1 /* header */ + (tableLog ? tableSize >> 1 : 1);
const FSE_symbolCompressionTransform* const symbolTT = (const FSE_symbolCompressionTransform*)(FSCT);

size_t nbBits = 0;
U32 s;
for (s = 0; s <= maxSymbolValue; ++s) {
nbBits += FSE_bitCost(symbolTT, tableLog, s, 8 /* accuracyLog */) * count[s];
}
return nbBits >> (8 + 3); /* accuracyLog + bit->byte */
}


static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
const void* src, size_t srcSize,
Expand Down
Loading