Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 72 additions & 8 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,15 @@ static size_t ZSTD_resolveMaxBlockSize(size_t maxBlockSize) {
}
}

static ZSTD_paramSwitch_e ZSTD_resolveExternalRepcodeSearch(ZSTD_paramSwitch_e value, int cLevel) {
if (value != ZSTD_ps_auto) return value;
if (cLevel < 10) {
return ZSTD_ps_disable;
} else {
return ZSTD_ps_enable;
}
}

/* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged.
* If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */
static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) {
Expand All @@ -315,6 +324,8 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences);
cctxParams.maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams.maxBlockSize);
cctxParams.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams.searchForExternalRepcodes,
cctxParams.compressionLevel);
assert(!ZSTD_checkCParams(cParams));
return cctxParams;
}
Expand Down Expand Up @@ -381,6 +392,7 @@ ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams,
cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, &params->cParams);
cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences);
cctxParams->maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams->maxBlockSize);
cctxParams->searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams->searchForExternalRepcodes, compressionLevel);
DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
}
Expand Down Expand Up @@ -613,6 +625,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
return bounds;

case ZSTD_c_searchForExternalRepcodes:
bounds.lowerBound = (int)ZSTD_ps_auto;
bounds.upperBound = (int)ZSTD_ps_disable;
return bounds;

default:
bounds.error = ERROR(parameter_unsupported);
return bounds;
Expand Down Expand Up @@ -680,6 +697,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
case ZSTD_c_prefetchCDictTables:
case ZSTD_c_enableMatchFinderFallback:
case ZSTD_c_maxBlockSize:
case ZSTD_c_searchForExternalRepcodes:
default:
return 0;
}
Expand Down Expand Up @@ -738,6 +756,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
case ZSTD_c_prefetchCDictTables:
case ZSTD_c_enableMatchFinderFallback:
case ZSTD_c_maxBlockSize:
case ZSTD_c_searchForExternalRepcodes:
break;

default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
Expand Down Expand Up @@ -981,6 +1000,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
CCtxParams->maxBlockSize = value;
return CCtxParams->maxBlockSize;

case ZSTD_c_searchForExternalRepcodes:
BOUNDCHECK(ZSTD_c_searchForExternalRepcodes, value);
CCtxParams->searchForExternalRepcodes = (ZSTD_paramSwitch_e)value;
return CCtxParams->searchForExternalRepcodes;

default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
}
Expand Down Expand Up @@ -1122,6 +1146,9 @@ size_t ZSTD_CCtxParams_getParameter(
case ZSTD_c_maxBlockSize:
*value = (int)CCtxParams->maxBlockSize;
break;
case ZSTD_c_searchForExternalRepcodes:
*value = (int)CCtxParams->searchForExternalRepcodes;
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
return 0;
Expand Down Expand Up @@ -3184,7 +3211,8 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
zc, &seqPos,
zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs,
src, srcSize
src, srcSize,
zc->appliedParams.searchForExternalRepcodes
),
"Failed to copy external sequences to seqStore!"
);
Expand Down Expand Up @@ -6000,6 +6028,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, &params.cParams);
params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences);
params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize);
params.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(params.searchForExternalRepcodes, params.compressionLevel);

#ifdef ZSTD_MULTITHREAD
/* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */
Expand Down Expand Up @@ -6259,9 +6288,11 @@ size_t
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize)
const void* src, size_t blockSize,
ZSTD_paramSwitch_e externalRepSearch)
{
U32 idx = seqPos->idx;
U32 const startIdx = idx;
BYTE const* ip = (BYTE const*)(src);
const BYTE* const iend = ip + blockSize;
repcodes_t updatedRepcodes;
Expand All @@ -6279,10 +6310,16 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
for (; idx < inSeqsSize && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) {
U32 const litLength = inSeqs[idx].litLength;
U32 const ll0 = (litLength == 0);
U32 const matchLength = inSeqs[idx].matchLength;
U32 const offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
U32 offBase;

if (externalRepSearch == ZSTD_ps_disable) {
offBase = OFFSET_TO_OFFBASE(inSeqs[idx].offset);
} else {
U32 const ll0 = (litLength == 0);
offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
}

DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
if (cctx->appliedParams.validateSequences) {
Expand All @@ -6296,6 +6333,30 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
ip += matchLength + litLength;
}

/* If we skipped repcode search while parsing, we need to update repcodes now */
assert(externalRepSearch != ZSTD_ps_auto);
assert(idx >= startIdx);
if (externalRepSearch == ZSTD_ps_disable && idx != startIdx) {
U32* const rep = updatedRepcodes.rep;
U32 lastSeqIdx = idx - 1; /* index of last non-block-delimiter sequence */

if (lastSeqIdx >= startIdx + 2) {
rep[2] = inSeqs[lastSeqIdx - 2].offset;
rep[1] = inSeqs[lastSeqIdx - 1].offset;
rep[0] = inSeqs[lastSeqIdx].offset;
} else if (lastSeqIdx == startIdx + 1) {
rep[2] = rep[0];
rep[1] = inSeqs[lastSeqIdx - 1].offset;
rep[0] = inSeqs[lastSeqIdx].offset;
} else {
assert(lastSeqIdx == startIdx);
rep[2] = rep[1];
rep[1] = rep[0];
rep[0] = inSeqs[lastSeqIdx].offset;
}
}

ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));

if (inSeqs[idx].litLength) {
Expand All @@ -6312,7 +6373,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
size_t
ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize)
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch)
{
U32 idx = seqPos->idx;
U32 startPosInSequence = seqPos->posInSequence;
Expand All @@ -6324,6 +6385,9 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
U32 bytesAdjustment = 0;
U32 finalMatchSplit = 0;

/* TODO(embg) support fast parsing mode in noBlockDelim mode */
(void)externalRepSearch;

if (cctx->cdict) {
dictSize = cctx->cdict->dictContentSize;
} else if (cctx->prefixDict.dict) {
Expand Down Expand Up @@ -6431,7 +6495,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*

typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize);
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
{
ZSTD_sequenceCopier sequenceCopier = NULL;
Expand Down Expand Up @@ -6539,7 +6603,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
ZSTD_resetSeqStore(&cctx->seqStore);
DEBUGLOG(5, "Working on new block. Blocksize: %zu (total:%zu)", blockSize, (ip - (const BYTE*)src) + blockSize);

additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize, cctx->appliedParams.searchForExternalRepcodes);
FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
blockSize -= additionalByteAdjustment;

Expand Down
7 changes: 5 additions & 2 deletions lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,9 @@ struct ZSTD_CCtx_params_s {

/* Adjust the max block size*/
size_t maxBlockSize;

/* Controls repcode search in external sequence parsing */
ZSTD_paramSwitch_e searchForExternalRepcodes;
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */

#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
Expand Down Expand Up @@ -1453,7 +1456,7 @@ size_t
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize);
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);

/* Returns the number of bytes to move the current read position back by.
* Only non-zero if we ended up splitting a sequence.
Expand All @@ -1470,6 +1473,6 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
size_t
ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize);
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);

#endif /* ZSTD_COMPRESS_H */
40 changes: 32 additions & 8 deletions lib/zstd.h
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,8 @@ typedef enum {
ZSTD_c_experimentalParam15=1012,
ZSTD_c_experimentalParam16=1013,
ZSTD_c_experimentalParam17=1014,
ZSTD_c_experimentalParam18=1015
ZSTD_c_experimentalParam18=1015,
ZSTD_c_experimentalParam19=1016
} ZSTD_cParameter;

typedef struct {
Expand Down Expand Up @@ -2126,18 +2127,41 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
* documentation (below) before setting this parameter. */
#define ZSTD_c_enableMatchFinderFallback ZSTD_c_experimentalParam17

/* ZSTD_c_maxBlockSize
* Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
* The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
/* ZSTD_c_maxBlockSize
* Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
* The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
*
* This parameter can be used to set an upper bound on the blocksize
* that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper
* bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make
* compressBound() innacurate). Only currently meant to be used for testing.
* This parameter can be used to set an upper bound on the blocksize
* that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper
* bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make
* compressBound() innacurate). Only currently meant to be used for testing.
*
*/
#define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18

/* ZSTD_c_searchForExternalRepcodes
* This parameter affects how zstd parses external sequences, such as sequences
* provided through the compressSequences() API or from an external matchfinder.
*
* If set to ZSTD_ps_enable, the library will check for repeated offsets in
* external sequences, even if those repcodes are not explicitly indicated in
* the "rep" field. Note that this is the only way to exploit repcode matches
* while using compressSequences() or an external matchfinder, since zstd
* currently ignores the "rep" field of external sequences.
*
* If set to ZSTD_ps_disable, the library will not exploit repeated offsets in
* external sequences, regardless of whether the "rep" field has been set. This
* reduces sequence compression overhead by about 25% while sacrificing some
* compression ratio.
*
* The default value is ZSTD_ps_auto, for which the library will enable/disable
* based on compression level.
*
* Note: for now, this param only has an effect if ZSTD_c_blockDelimiters is
* set to ZSTD_sf_explicitBlockDelimiters. That may change in the future.
*/
#define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19

/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value.
Expand Down
1 change: 1 addition & 0 deletions tests/fuzz/zstd_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer
setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer);
setRand(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN, ZSTD_BLOCKSIZE_MAX, producer);
setRand(cctx, ZSTD_c_validateSequences, 0, 1, producer);
setRand(cctx, ZSTD_c_searchForExternalRepcodes, 0, 2, producer);
if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
}
Expand Down