From e43bf4891067566c6c9e772f6fb0b0479c6c7ff9 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 9 Nov 2022 20:05:03 -0500 Subject: [PATCH 01/60] First building commit with sample matchfinder --- contrib/externalMatchfinder/Makefile | 40 ++++++++++++++ contrib/externalMatchfinder/main.c | 64 +++++++++++++++++++++++ contrib/externalMatchfinder/matchfinder.c | 62 ++++++++++++++++++++++ contrib/externalMatchfinder/matchfinder.h | 12 +++++ lib/compress/zstd_compress.c | 42 ++++++++++++--- lib/compress/zstd_compress_internal.h | 15 ++++++ lib/zstd.h | 18 +++++++ 7 files changed, 246 insertions(+), 7 deletions(-) create mode 100644 contrib/externalMatchfinder/Makefile create mode 100644 contrib/externalMatchfinder/main.c create mode 100644 contrib/externalMatchfinder/matchfinder.c create mode 100644 contrib/externalMatchfinder/matchfinder.h diff --git a/contrib/externalMatchfinder/Makefile b/contrib/externalMatchfinder/Makefile new file mode 100644 index 00000000000..46a5fc2dc08 --- /dev/null +++ b/contrib/externalMatchfinder/Makefile @@ -0,0 +1,40 @@ +# ################################################################ +# Copyright (c) 2018-present, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ################################################################ + +PROGDIR = ../../programs +LIBDIR = ../../lib + +LIBZSTD = $(LIBDIR)/libzstd.a + +CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/compress -I$(LIBDIR)/common + +CFLAGS ?= -O3 +CFLAGS += -std=gnu99 +DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum \ + -Wstrict-prototypes -Wundef -Wpointer-arith \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) + +default: externalMatchfinder + +all: externalMatchfinder + +externalMatchfinder: matchfinder.c main.c $(LIBZSTD) + $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ + +.PHONY: $(LIBZSTD) +$(LIBZSTD): + $(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)" + +clean: + $(RM) *.o + $(MAKE) -C $(LIBDIR) clean > /dev/null + $(RM) externalMatchfinder diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c new file mode 100644 index 00000000000..6ffe282979b --- /dev/null +++ b/contrib/externalMatchfinder/main.c @@ -0,0 +1,64 @@ +#include +#include +#include + +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" +#include "matchfinder.h" // simpleExternalMatchfinder + +const size_t ZSTD_LEVEL = 1; + +int main(int argc, char *argv[]) { + ZSTD_CCtx* zc = ZSTD_createCCtx(); + + // Here is the crucial bit of code! + ZSTD_registerExternalMatchfinder(zc, simpleExternalMatchfinder); + + if (argc != 2) { + printf("Usage: exampleMatchfinder \n"); + return 1; + } + + FILE *f = fopen(argv[1], "rb"); + fseek(f, 0, SEEK_END); + long srcSize = ftell(f); + fseek(f, 0, SEEK_SET); + + char *src = malloc(srcSize + 1); + fread(src, srcSize, 1, f); + fclose(f); + + size_t dstSize = ZSTD_compressBound(srcSize); + char *dst = malloc(dstSize); + + size_t cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize); + + if (ZSTD_isError(cSize)) { + printf("ERROR: %lu\n", cSize); + return 1; + } + + char *val = malloc(srcSize); + size_t res = ZSTD_decompress(val, srcSize, dst, cSize); + + if (ZSTD_isError(res)) { + printf("ERROR: %lu\n", cSize); + return 1; + } + + if (memcmp(src, val, srcSize) == 0) { + printf("Compression and decompression were successful!\n"); + printf("Original size: %lu\n", srcSize); + printf("Compressed size: %lu\n", cSize); + return 0; + } else { + printf("ERROR: input and validation buffers don't match!\n"); + for (int i = 0; i < srcSize; i++) { + if (src[i] != val[i]) { + printf("First bad index: %d\n", i); + break; + } + } + return 1; + } +} diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c new file mode 100644 index 00000000000..a67cdad482a --- /dev/null +++ b/contrib/externalMatchfinder/matchfinder.c @@ -0,0 +1,62 @@ +#include "zstd_compress_internal.h" +#include "matchfinder.h" +#include + +#define HSIZE 1024 +static U32 const HLOG = 10; +static U32 const MLS = 4; +static U32 const BADIDX = (1 << 31); + +size_t simpleExternalMatchfinder( + void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, size_t historySize +) { + (void)(externalMatchState); + (void)(historySize); + (void)(outSeqsCapacity); // @nocommit return an error + + const BYTE* const istart = (const BYTE*)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + const BYTE* anchor = istart; + + size_t seqCount = 0; + + U32 hashTable[HSIZE]; + for (int i=0; i < HSIZE; i++) { + hashTable[i] = BADIDX; + } + + while (ip + 4 < iend) { + size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); + U32 const matchIndex = hashTable[hash]; + hashTable[hash] = ip - istart; + + if (matchIndex != BADIDX) { + const BYTE* const match = istart + matchIndex; + size_t const matchLen = ZSTD_count(ip, match, iend); + if (matchLen >= ZSTD_MINMATCH_MIN) { + U32 const litLen = ip - anchor; + U32 const offset = ip - match; + ZSTD_Sequence const seq = { + offset, litLen, matchLen, 0 + }; + outSeqs[seqCount++] = seq; + ip += matchLen; + anchor = ip; + continue; + } + } + + ip++; + } + + { + ZSTD_Sequence const finalSeq = { + 0, iend - anchor, 0, 0 + }; + outSeqs[seqCount] = finalSeq; + } + + return seqCount; +} diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h new file mode 100644 index 00000000000..ad2c1b2b6e8 --- /dev/null +++ b/contrib/externalMatchfinder/matchfinder.h @@ -0,0 +1,12 @@ +#ifndef MATCHFINDER_H +#define MATCHFINDER_H + +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" + +size_t simpleExternalMatchfinder( + void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, size_t historySize +); + +#endif diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 4cf2c09456e..10c8bbf2f7a 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -28,6 +28,7 @@ #include "zstd_ldm.h" #include "zstd_compress_superblock.h" #include "../common/bits.h" /* ZSTD_highbit32 */ +#include /* *************************************************************** * Tuning parameters @@ -2870,6 +2871,14 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; +// @nocommit External matchfinder, need to move this at some point +ZSTD_Sequence globalSeqs[100000]; // @nocommit + +void ZSTD_registerExternalMatchfinder(ZSTD_CCtx* zc, ZSTD_externalMatchfinder_F* externalBlockMatchfinder) { + // @nocommit Check on how to make this sticky + zc->externalBlockMatchfinder = externalBlockMatchfinder; +} + static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) { ZSTD_matchState_t* const ms = &zc->blockState.matchState; @@ -2940,6 +2949,15 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); + } else if (1 /* @nocommit: change to a cparam */) { + size_t numSeqsFound = (zc->externalBlockMatchfinder)( + NULL, globalSeqs, 100000, src, srcSize, 0 + ); + ZSTD_sequencePosition seqPos = {0,0,0}; + lastLLSize = 0; // @nocommit + ZSTD_copySequencesToSeqStoreExplicitBlockDelim( + zc, &seqPos, globalSeqs, numSeqsFound, src, srcSize + ); } else { /* not long range mode */ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, @@ -3863,6 +3881,22 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, return cSize; } +// @nocommit +// static size_t ZSTD_buildSeqStoreUsingExternalMatchfinder( +// ZSTD_CCtx* zc, const void* src, size_t srcSize +// ) { +// if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { +// if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { +// ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); +// } else { +// ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); +// } +// return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ +// } +// ZSTD_resetSeqStore(&(zc->seqStore)); +// return 0; +// } + static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, @@ -5927,12 +5961,6 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, } } -typedef struct { - U32 idx; /* Index in array of ZSTD_Sequence */ - U32 posInSequence; /* Position within sequence at idx */ - size_t posInSrc; /* Number of bytes given by sequences provided so far */ -} ZSTD_sequencePosition; - /* ZSTD_validateSequence() : * @offCode : is presumed to follow format required by ZSTD_storeSeq() * @returns a ZSTD error code if sequence is not valid @@ -5973,7 +6001,7 @@ static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. */ -static size_t +size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index e4bb2f5372c..274879028e0 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -150,6 +150,18 @@ typedef struct { size_t capacity; /* The capacity starting from `seq` pointer */ } rawSeqStore_t; +typedef struct { + U32 idx; /* Index in array of ZSTD_Sequence */ + U32 posInSequence; /* Position within sequence at idx */ + size_t posInSrc; /* Number of bytes given by sequences provided so far */ +} ZSTD_sequencePosition; + +size_t +ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, + ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize); + UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; typedef struct { @@ -440,6 +452,9 @@ struct ZSTD_CCtx_s { /* Workspace for block splitter */ ZSTD_blockSplitCtx blockSplitCtx; + + /* External block matchfinder */ + ZSTD_externalMatchfinder_F* externalBlockMatchfinder; }; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; diff --git a/lib/zstd.h b/lib/zstd.h index dd72e17ed76..e66bae75300 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1508,6 +1508,24 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, const ZSTD_Sequence* inSeqs, size_t inSeqsSize, const void* src, size_t srcSize); +typedef size_t ZSTD_externalMatchfinder_F ( + void* externalMatchState, // TODO make this an externalMatchState type + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + // outSeqsCapacity >= blockSize / MINMATCH + const void* src, size_t srcSize, size_t historySize + // srcSize - historySize <= 128 KB + // historySize < srcSize ; any size +); + +// @nocommit document this +ZSTDLIB_STATIC_API void +ZSTD_registerExternalMatchfinder(ZSTD_CCtx* cctx, ZSTD_externalMatchfinder_F* externalBlockMatchfinder); + +// @nocommit +size_t DONT_COMMIT_simpleExternalMatchfinder( + void* matchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, size_t historySize +); /*! ZSTD_writeSkippableFrame() : * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. From 625bf62fa6499e5c183d291d7096187795479038 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 20 Nov 2022 18:54:17 -0700 Subject: [PATCH 02/60] Set up ZSTD_externalMatchCtx struct --- contrib/externalMatchfinder/main.c | 14 +++++-- contrib/externalMatchfinder/matchfinder.c | 6 ++- contrib/externalMatchfinder/matchfinder.h | 4 +- lib/compress/zstd_compress.c | 46 +++++++++++------------ lib/compress/zstd_compress_internal.h | 12 +++++- lib/zstd.h | 24 ++++++++---- 6 files changed, 68 insertions(+), 38 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 6ffe282979b..2d6c547dbd3 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -4,7 +4,8 @@ #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" -#include "matchfinder.h" // simpleExternalMatchfinder +#include "zstd_errors.h" +#include "matchfinder.h" // simpleExternalMatchFinder, simpleExternalMatchStateDestructor const size_t ZSTD_LEVEL = 1; @@ -12,7 +13,12 @@ int main(int argc, char *argv[]) { ZSTD_CCtx* zc = ZSTD_createCCtx(); // Here is the crucial bit of code! - ZSTD_registerExternalMatchfinder(zc, simpleExternalMatchfinder); + ZSTD_registerExternalMatchfinder( + zc, + NULL, + simpleExternalMatchFinder, + simpleExternalMatchStateDestructor + ); if (argc != 2) { printf("Usage: exampleMatchfinder \n"); @@ -34,7 +40,7 @@ int main(int argc, char *argv[]) { size_t cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize); if (ZSTD_isError(cSize)) { - printf("ERROR: %lu\n", cSize); + printf("ERROR: %s\n", ZSTD_getErrorString(cSize)); return 1; } @@ -42,7 +48,7 @@ int main(int argc, char *argv[]) { size_t res = ZSTD_decompress(val, srcSize, dst, cSize); if (ZSTD_isError(res)) { - printf("ERROR: %lu\n", cSize); + printf("ERROR: %s\n", ZSTD_getErrorString(res)); return 1; } diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index a67cdad482a..b8237f8d660 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -7,7 +7,7 @@ static U32 const HLOG = 10; static U32 const MLS = 4; static U32 const BADIDX = (1 << 31); -size_t simpleExternalMatchfinder( +size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, size_t historySize ) { @@ -60,3 +60,7 @@ size_t simpleExternalMatchfinder( return seqCount; } + +void simpleExternalMatchStateDestructor(void* externalMatchState) { + (void)externalMatchState; +} diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index ad2c1b2b6e8..74041e25265 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -4,9 +4,11 @@ #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" -size_t simpleExternalMatchfinder( +size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, size_t historySize ); +void simpleExternalMatchStateDestructor(void* matchState); + #endif diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 10c8bbf2f7a..5293476dd1e 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -28,6 +28,8 @@ #include "zstd_ldm.h" #include "zstd_compress_superblock.h" #include "../common/bits.h" /* ZSTD_highbit32 */ +#define ZSTD_DEPS_NEED_MALLOC +#include "../common/zstd_deps.h" /* ZSTD_malloc */ #include /* *************************************************************** @@ -2872,11 +2874,23 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; // @nocommit External matchfinder, need to move this at some point -ZSTD_Sequence globalSeqs[100000]; // @nocommit - -void ZSTD_registerExternalMatchfinder(ZSTD_CCtx* zc, ZSTD_externalMatchfinder_F* externalBlockMatchfinder) { - // @nocommit Check on how to make this sticky - zc->externalBlockMatchfinder = externalBlockMatchfinder; +void ZSTD_registerExternalMatchfinder( + ZSTD_CCtx* zc, void* mState, + ZSTD_externalMatchFinder_F* mFinder, + ZSTD_externalMatchStateDestructor_F* mStateDestructor +) { + // @nocommit Call clearExternalMatchContext() + size_t const seqBufferSize = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX) * sizeof(ZSTD_Sequence); + void* seqBuffer = ZSTD_malloc(seqBufferSize); + + ZSTD_externalMatchCtx emctx = { + mState, + mFinder, + mStateDestructor, + seqBuffer, + seqBufferSize + }; + zc->externalMatchCtx = emctx; } static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) @@ -2950,13 +2964,13 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); } else if (1 /* @nocommit: change to a cparam */) { - size_t numSeqsFound = (zc->externalBlockMatchfinder)( - NULL, globalSeqs, 100000, src, srcSize, 0 + size_t numSeqsFound = (zc->externalMatchCtx.mFinder)( + NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferSize, src, srcSize, 0 ); ZSTD_sequencePosition seqPos = {0,0,0}; lastLLSize = 0; // @nocommit ZSTD_copySequencesToSeqStoreExplicitBlockDelim( - zc, &seqPos, globalSeqs, numSeqsFound, src, srcSize + zc, &seqPos, zc->externalMatchCtx.seqBuffer, numSeqsFound, src, srcSize ); } else { /* not long range mode */ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, @@ -3881,22 +3895,6 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, return cSize; } -// @nocommit -// static size_t ZSTD_buildSeqStoreUsingExternalMatchfinder( -// ZSTD_CCtx* zc, const void* src, size_t srcSize -// ) { -// if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { -// if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { -// ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); -// } else { -// ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); -// } -// return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ -// } -// ZSTD_resetSeqStore(&(zc->seqStore)); -// return 0; -// } - static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 274879028e0..02ba51954d3 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -383,6 +383,16 @@ typedef struct { ZSTD_entropyCTablesMetadata_t entropyMetadata; } ZSTD_blockSplitCtx; +/* Block-level sequence compression API */ +// @nocommit improve docs +typedef struct { + void* mState; + ZSTD_externalMatchFinder_F* mFinder; + ZSTD_externalMatchStateDestructor_F* mStateDestructor; + void* seqBuffer; // @nocommit change from void* to ZSTD_Sequence* + size_t seqBufferSize; +} ZSTD_externalMatchCtx; + struct ZSTD_CCtx_s { ZSTD_compressionStage_e stage; int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ @@ -454,7 +464,7 @@ struct ZSTD_CCtx_s { ZSTD_blockSplitCtx blockSplitCtx; /* External block matchfinder */ - ZSTD_externalMatchfinder_F* externalBlockMatchfinder; + ZSTD_externalMatchCtx externalMatchCtx; }; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; diff --git a/lib/zstd.h b/lib/zstd.h index e66bae75300..7f86a6bf70e 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1508,7 +1508,11 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, const ZSTD_Sequence* inSeqs, size_t inSeqsSize, const void* src, size_t srcSize); -typedef size_t ZSTD_externalMatchfinder_F ( +/* Block-level sequence compression API */ +// @nocommit improve docs + +// @nocommit move bounds comments into docs +typedef size_t ZSTD_externalMatchFinder_F ( void* externalMatchState, // TODO make this an externalMatchState type ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, // outSeqsCapacity >= blockSize / MINMATCH @@ -1517,16 +1521,21 @@ typedef size_t ZSTD_externalMatchfinder_F ( // historySize < srcSize ; any size ); +typedef void ZSTD_externalMatchStateDestructor_F ( + void* externalMatchState +); + // @nocommit document this ZSTDLIB_STATIC_API void -ZSTD_registerExternalMatchfinder(ZSTD_CCtx* cctx, ZSTD_externalMatchfinder_F* externalBlockMatchfinder); - -// @nocommit -size_t DONT_COMMIT_simpleExternalMatchfinder( - void* matchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, - const void* src, size_t srcSize, size_t historySize +ZSTD_registerExternalMatchfinder( + ZSTD_CCtx* cctx, + void* externalMatchState, + ZSTD_externalMatchFinder_F* externalMatchFinder, + ZSTD_externalMatchStateDestructor_F* externalMatchStateDestructor ); +/****************************************/ + /*! ZSTD_writeSkippableFrame() : * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. * @@ -2694,6 +2703,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ + #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ #if defined (__cplusplus) From a3c5c2b0bd44c78e600bfc15a2c8cfdff52d0dcc Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 20 Nov 2022 19:07:12 -0700 Subject: [PATCH 03/60] move seqBuffer to ZSTD_Sequence* --- lib/compress/zstd_compress.c | 8 ++++---- lib/compress/zstd_compress_internal.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 5293476dd1e..99065f8c262 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2880,15 +2880,15 @@ void ZSTD_registerExternalMatchfinder( ZSTD_externalMatchStateDestructor_F* mStateDestructor ) { // @nocommit Call clearExternalMatchContext() - size_t const seqBufferSize = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX) * sizeof(ZSTD_Sequence); - void* seqBuffer = ZSTD_malloc(seqBufferSize); + size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); + ZSTD_Sequence* seqBuffer = (ZSTD_Sequence*)ZSTD_malloc(seqBufferCapacity * sizeof(ZSTD_Sequence)); ZSTD_externalMatchCtx emctx = { mState, mFinder, mStateDestructor, seqBuffer, - seqBufferSize + seqBufferCapacity }; zc->externalMatchCtx = emctx; } @@ -2965,7 +2965,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) assert(ldmSeqStore.pos == ldmSeqStore.size); } else if (1 /* @nocommit: change to a cparam */) { size_t numSeqsFound = (zc->externalMatchCtx.mFinder)( - NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferSize, src, srcSize, 0 + NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, 0 ); ZSTD_sequencePosition seqPos = {0,0,0}; lastLLSize = 0; // @nocommit diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 02ba51954d3..d06d42e32e9 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -389,8 +389,8 @@ typedef struct { void* mState; ZSTD_externalMatchFinder_F* mFinder; ZSTD_externalMatchStateDestructor_F* mStateDestructor; - void* seqBuffer; // @nocommit change from void* to ZSTD_Sequence* - size_t seqBufferSize; + ZSTD_Sequence* seqBuffer; + size_t seqBufferCapacity; } ZSTD_externalMatchCtx; struct ZSTD_CCtx_s { From 201588a802f72fb7a8fc3c884ea7947b42e85906 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 20 Nov 2022 19:12:11 -0700 Subject: [PATCH 04/60] support non-contiguous dictionary --- contrib/externalMatchfinder/matchfinder.c | 5 +++-- contrib/externalMatchfinder/matchfinder.h | 2 +- lib/compress/zstd_compress.c | 3 ++- lib/zstd.h | 3 ++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index b8237f8d660..c974791b69f 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -9,10 +9,11 @@ static U32 const BADIDX = (1 << 31); size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, - const void* src, size_t srcSize, size_t historySize + const void* src, size_t srcSize, const void* dict, size_t dictSize ) { (void)(externalMatchState); - (void)(historySize); + (void)(dict); + (void)(dictSize); (void)(outSeqsCapacity); // @nocommit return an error const BYTE* const istart = (const BYTE*)src; diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index 74041e25265..5ad0909c6b2 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -6,7 +6,7 @@ size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, - const void* src, size_t srcSize, size_t historySize + const void* src, size_t srcSize, const void* dict, size_t dictSize ); void simpleExternalMatchStateDestructor(void* matchState); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 99065f8c262..4445cf05390 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2964,8 +2964,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); } else if (1 /* @nocommit: change to a cparam */) { + // @nocommit document lack of dictionary support in function call size_t numSeqsFound = (zc->externalMatchCtx.mFinder)( - NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, 0 + NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, NULL, 0 ); ZSTD_sequencePosition seqPos = {0,0,0}; lastLLSize = 0; // @nocommit diff --git a/lib/zstd.h b/lib/zstd.h index 7f86a6bf70e..7713c57dcfe 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1516,7 +1516,8 @@ typedef size_t ZSTD_externalMatchFinder_F ( void* externalMatchState, // TODO make this an externalMatchState type ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, // outSeqsCapacity >= blockSize / MINMATCH - const void* src, size_t srcSize, size_t historySize + const void* src, size_t srcSize, + const void* dict, size_t dictSize // srcSize - historySize <= 128 KB // historySize < srcSize ; any size ); From 5a01759f0468a14c1f229ba1aa8e0d0ee4207c9f Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 20 Nov 2022 19:13:32 -0700 Subject: [PATCH 05/60] clean up parens --- contrib/externalMatchfinder/matchfinder.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index c974791b69f..aa20aacbe73 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -11,10 +11,10 @@ size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize ) { - (void)(externalMatchState); - (void)(dict); - (void)(dictSize); - (void)(outSeqsCapacity); // @nocommit return an error + (void)externalMatchState; + (void)dict; + (void)dictSize; + (void)outSeqsCapacity; // @nocommit return an error const BYTE* const istart = (const BYTE*)src; const BYTE* const iend = istart + srcSize; From c6c5b5b37e0abca2cd784790e8249feb595db13d Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 20 Nov 2022 19:53:32 -0700 Subject: [PATCH 06/60] add clearExternalMatchfinder, handle allocation errors --- contrib/externalMatchfinder/main.c | 17 +++++++++++++---- lib/compress/zstd_compress.c | 24 ++++++++++++++++++++++-- lib/zstd.h | 12 +++++++++--- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 2d6c547dbd3..0133bdcae66 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -10,18 +10,25 @@ const size_t ZSTD_LEVEL = 1; int main(int argc, char *argv[]) { + size_t res; + + if (argc != 2) { + printf("Usage: exampleMatchfinder \n"); + return 1; + } + ZSTD_CCtx* zc = ZSTD_createCCtx(); // Here is the crucial bit of code! - ZSTD_registerExternalMatchfinder( + res = ZSTD_registerExternalMatchFinder( zc, NULL, simpleExternalMatchFinder, simpleExternalMatchStateDestructor ); - if (argc != 2) { - printf("Usage: exampleMatchfinder \n"); + if (ZSTD_isError(res)) { + printf("ERROR: %s\n", ZSTD_getErrorString(res)); return 1; } @@ -45,7 +52,9 @@ int main(int argc, char *argv[]) { } char *val = malloc(srcSize); - size_t res = ZSTD_decompress(val, srcSize, dst, cSize); + res = ZSTD_decompress(val, srcSize, dst, cSize); + + ZSTD_freeCCtx(zc); if (ZSTD_isError(res)) { printf("ERROR: %s\n", ZSTD_getErrorString(res)); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 4445cf05390..2480dbd9e66 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -173,6 +173,7 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) assert(cctx != NULL); assert(cctx->staticSize == 0); ZSTD_clearAllDicts(cctx); + ZSTD_clearExternalMatchFinder(cctx); #ifdef ZSTD_MULTITHREAD ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; #endif @@ -2874,14 +2875,18 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; // @nocommit External matchfinder, need to move this at some point -void ZSTD_registerExternalMatchfinder( +size_t ZSTD_registerExternalMatchFinder( ZSTD_CCtx* zc, void* mState, ZSTD_externalMatchFinder_F* mFinder, ZSTD_externalMatchStateDestructor_F* mStateDestructor ) { - // @nocommit Call clearExternalMatchContext() + ZSTD_clearExternalMatchFinder(zc); + size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); ZSTD_Sequence* seqBuffer = (ZSTD_Sequence*)ZSTD_malloc(seqBufferCapacity * sizeof(ZSTD_Sequence)); + if (seqBuffer == NULL) { + return ZSTD_error_memory_allocation; + } ZSTD_externalMatchCtx emctx = { mState, @@ -2891,6 +2896,21 @@ void ZSTD_registerExternalMatchfinder( seqBufferCapacity }; zc->externalMatchCtx = emctx; + + return ZSTD_error_no_error; +} + +void ZSTD_clearExternalMatchFinder( + ZSTD_CCtx* zc +) { + ZSTD_externalMatchCtx const emctx = zc->externalMatchCtx; + if (emctx.mFinder != NULL) { + if (emctx.mStateDestructor != NULL) { + (emctx.mStateDestructor)(emctx.mState); + } + ZSTD_free(emctx.seqBuffer); + ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx)); + } } static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) diff --git a/lib/zstd.h b/lib/zstd.h index 7713c57dcfe..cfca29ad49b 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1526,15 +1526,21 @@ typedef void ZSTD_externalMatchStateDestructor_F ( void* externalMatchState ); -// @nocommit document this -ZSTDLIB_STATIC_API void -ZSTD_registerExternalMatchfinder( +ZSTDLIB_STATIC_API size_t +ZSTD_registerExternalMatchFinder( ZSTD_CCtx* cctx, void* externalMatchState, ZSTD_externalMatchFinder_F* externalMatchFinder, ZSTD_externalMatchStateDestructor_F* externalMatchStateDestructor ); +/* Note: calls the previously-registered ZSTD_externalMatchStateDestructor_F* + * if it is non-NULL. */ +ZSTDLIB_STATIC_API void +ZSTD_clearExternalMatchFinder( + ZSTD_CCtx* cctx +); + /****************************************/ /*! ZSTD_writeSkippableFrame() : From 462e967fc9f2c3b82d77dbf87fcbf1349e067569 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 22 Nov 2022 21:48:56 -0500 Subject: [PATCH 07/60] Add useExternalMatchfinder cParam --- contrib/externalMatchfinder/main.c | 7 +++++++ lib/compress/zstd_compress.c | 17 ++++++++++++++++- lib/compress/zstd_compress_internal.h | 3 +++ lib/zstd.h | 7 ++++++- 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 0133bdcae66..c831ee8da34 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -32,6 +32,13 @@ int main(int argc, char *argv[]) { return 1; } + res = ZSTD_CCtx_setParameter(zc, ZSTD_c_useExternalMatchfinder, 1); + + if (ZSTD_isError(res)) { + printf("ERROR: %s\n", ZSTD_getErrorString(res)); + return 1; + } + FILE *f = fopen(argv[1], "rb"); fseek(f, 0, SEEK_END); long srcSize = ftell(f); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 2480dbd9e66..adb30381f87 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -588,6 +588,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = (int)ZSTD_ps_disable; return bounds; + case ZSTD_c_useExternalMatchfinder: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + default: bounds.error = ERROR(parameter_unsupported); return bounds; @@ -653,6 +658,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: + case ZSTD_c_useExternalMatchfinder: default: return 0; } @@ -709,6 +715,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: + case ZSTD_c_useExternalMatchfinder: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -941,6 +948,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value; return CCtxParams->prefetchCDictTables; + case ZSTD_c_useExternalMatchfinder: + BOUNDCHECK(ZSTD_c_useExternalMatchfinder, value); + CCtxParams->useExternalMatchfinder = value; + return CCtxParams->useExternalMatchfinder; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -1076,6 +1088,9 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_prefetchCDictTables: *value = (int)CCtxParams->prefetchCDictTables; break; + case ZSTD_c_useExternalMatchfinder: + *value = CCtxParams->useExternalMatchfinder; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -2983,7 +2998,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); - } else if (1 /* @nocommit: change to a cparam */) { + } else if (zc->appliedParams.useExternalMatchfinder) { // @nocommit document lack of dictionary support in function call size_t numSeqsFound = (zc->externalMatchCtx.mFinder)( NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, NULL, 0 diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index d06d42e32e9..62b9a79fb33 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -352,6 +352,9 @@ struct ZSTD_CCtx_params_s { /* Controls prefetching in some dictMatchState matchfinders */ ZSTD_paramSwitch_e prefetchCDictTables; + + /* Controls block-level sequence compression API */ + int useExternalMatchfinder; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) diff --git a/lib/zstd.h b/lib/zstd.h index cfca29ad49b..362317ecb71 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -478,6 +478,7 @@ typedef enum { * ZSTD_c_useBlockSplitter * ZSTD_c_useRowMatchFinder * ZSTD_c_prefetchCDictTables + * ZSTD_c_useExternalMatchfinder * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -497,7 +498,8 @@ typedef enum { ZSTD_c_experimentalParam13=1010, ZSTD_c_experimentalParam14=1011, ZSTD_c_experimentalParam15=1012, - ZSTD_c_experimentalParam16=1013 + ZSTD_c_experimentalParam16=1013, + ZSTD_c_experimentalParam17=1014 } ZSTD_cParameter; typedef struct { @@ -2078,6 +2080,9 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo */ #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 +// @nocommit document +#define ZSTD_c_useExternalMatchfinder ZSTD_c_experimentalParam17 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. From a3a5f3d1e360552896216e25638060f4ab897484 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 23 Nov 2022 10:55:43 -0500 Subject: [PATCH 08/60] validate useExternalMatchfinder cParam --- contrib/externalMatchfinder/main.c | 8 ++++---- lib/compress/zstd_compress.c | 14 +++++++++++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index c831ee8da34..eaba605b6af 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -28,14 +28,14 @@ int main(int argc, char *argv[]) { ); if (ZSTD_isError(res)) { - printf("ERROR: %s\n", ZSTD_getErrorString(res)); + printf("ERROR: %s\n", ZSTD_getErrorName(res)); return 1; } res = ZSTD_CCtx_setParameter(zc, ZSTD_c_useExternalMatchfinder, 1); if (ZSTD_isError(res)) { - printf("ERROR: %s\n", ZSTD_getErrorString(res)); + printf("ERROR: %s\n", ZSTD_getErrorName(res)); return 1; } @@ -54,7 +54,7 @@ int main(int argc, char *argv[]) { size_t cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize); if (ZSTD_isError(cSize)) { - printf("ERROR: %s\n", ZSTD_getErrorString(cSize)); + printf("ERROR: %s\n", ZSTD_getErrorName(cSize)); return 1; } @@ -64,7 +64,7 @@ int main(int argc, char *argv[]) { ZSTD_freeCCtx(zc); if (ZSTD_isError(res)) { - printf("ERROR: %s\n", ZSTD_getErrorString(res)); + printf("ERROR: %s\n", ZSTD_getErrorName(res)); return 1; } diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index adb30381f87..df5d2cf1f5a 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -681,6 +681,14 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) "MT not compatible with static alloc"); break; + case ZSTD_c_useExternalMatchfinder: + RETURN_ERROR_IF( + (value == 1) && (cctx->externalMatchCtx.mFinder == NULL), + parameter_unsupported, + "Must register an external matchfinder to set useExternalMatchfinder=1" + ); + break; + case ZSTD_c_compressionLevel: case ZSTD_c_windowLog: case ZSTD_c_hashLog: @@ -715,7 +723,6 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: - case ZSTD_c_useExternalMatchfinder: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -2982,6 +2989,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) src, srcSize); assert(zc->externSeqStore.pos <= zc->externSeqStore.size); } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { + // @nocommit Check for LDM, fail if useExternalMatchfinder is set rawSeqStore_t ldmSeqStore = kNullRawSeqStore; ldmSeqStore.seq = zc->ldmSequences; @@ -3000,6 +3008,10 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) assert(ldmSeqStore.pos == ldmSeqStore.size); } else if (zc->appliedParams.useExternalMatchfinder) { // @nocommit document lack of dictionary support in function call + RETURN_ERROR_IF( + zc->externalMatchCtx.mFinder == NULL, parameter_unsupported, + "useExternalMatchfinder=1 but no external matchfinder is registered!" + ); size_t numSeqsFound = (zc->externalMatchCtx.mFinder)( NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, NULL, 0 ); From 8562b10b2dc16fadc9d0bb4fd1acf467ec65a4df Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 23 Nov 2022 15:36:55 -0500 Subject: [PATCH 09/60] Disable LDM + external matchfinder --- lib/compress/zstd_compress.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index df5d2cf1f5a..d83b78783b9 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2942,6 +2942,16 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) assert(srcSize <= ZSTD_BLOCKSIZE_MAX); /* Assert that we have correctly flushed the ctx params into the ms's copy */ ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); + + /* External matchfinder + LDM is technically possible, just not implemented yet. + * We need to revisit soon and implement it. */ + RETURN_ERROR_IF( + (zc->appliedParams.useExternalMatchfinder == 1) && + (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable), + parameter_unsupported, // @nocommit Make this a parameter *combination* error + "Long-distance matching with external matchfinder enabled is not currently supported." + ); + /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding * additional 1. We need to revisit and change this logic to be more consistent */ if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { @@ -2989,7 +2999,6 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) src, srcSize); assert(zc->externSeqStore.pos <= zc->externSeqStore.size); } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { - // @nocommit Check for LDM, fail if useExternalMatchfinder is set rawSeqStore_t ldmSeqStore = kNullRawSeqStore; ldmSeqStore.seq = zc->ldmSequences; From 6488d8d12899846eb464af163896c207dfc6af57 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 23 Nov 2022 16:58:53 -0500 Subject: [PATCH 10/60] Check for static CCtx --- lib/compress/zstd_compress.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index d83b78783b9..ce3a3604b7c 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2902,6 +2902,11 @@ size_t ZSTD_registerExternalMatchFinder( ZSTD_externalMatchFinder_F* mFinder, ZSTD_externalMatchStateDestructor_F* mStateDestructor ) { + RETURN_ERROR_IF( + zc->staticSize, parameter_unsupported, + "External matchfinder is not compatible with static alloc" + ); + ZSTD_clearExternalMatchFinder(zc); size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); From 50df82631f9f5aebccffbd305cea80798b25ef01 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 23 Nov 2022 17:18:59 -0500 Subject: [PATCH 11/60] Validate mState and mStateDestructor --- contrib/externalMatchfinder/main.c | 4 +++- lib/compress/zstd_compress.c | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index eaba605b6af..af741245957 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -19,10 +19,12 @@ int main(int argc, char *argv[]) { ZSTD_CCtx* zc = ZSTD_createCCtx(); + int simpleExternalMatchState = 0xdeadbeef; // @nocommit + // Here is the crucial bit of code! res = ZSTD_registerExternalMatchFinder( zc, - NULL, + &simpleExternalMatchState, simpleExternalMatchFinder, simpleExternalMatchStateDestructor ); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index ce3a3604b7c..671be8126ae 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2907,6 +2907,12 @@ size_t ZSTD_registerExternalMatchFinder( "External matchfinder is not compatible with static alloc" ); + RETURN_ERROR_IF( + (mState && !mStateDestructor) || (!mState && mStateDestructor), + GENERIC, + "Cannot provide external match state without corresponding destructor, or vice versa!" + ); + ZSTD_clearExternalMatchFinder(zc); size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); From f03da10f7af92c1cc3c558d145ebc0d4177fa41b Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 23 Nov 2022 23:28:35 -0500 Subject: [PATCH 12/60] Improve LDM check to cover both branches --- lib/compress/zstd_compress.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 671be8126ae..6ba37be3857 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2954,15 +2954,6 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* Assert that we have correctly flushed the ctx params into the ms's copy */ ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); - /* External matchfinder + LDM is technically possible, just not implemented yet. - * We need to revisit soon and implement it. */ - RETURN_ERROR_IF( - (zc->appliedParams.useExternalMatchfinder == 1) && - (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable), - parameter_unsupported, // @nocommit Make this a parameter *combination* error - "Long-distance matching with external matchfinder enabled is not currently supported." - ); - /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding * additional 1. We need to revisit and change this logic to be more consistent */ if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { @@ -3001,6 +2992,24 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) } if (zc->externSeqStore.pos < zc->externSeqStore.size) { assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable); + + /* External matchfinder + LDM is technically possible, just not implemented yet. + * We need to revisit soon and implement it. */ + if (zc->appliedParams.useBlockSplitter == ZSTD_ps_enable) { + RETURN_ERROR_IF( + zc->appliedParams.useExternalMatchfinder == 1, + parameter_unsupported, // @nocommit Make this a parameter *combination* error? + "Block splitting with external matchfinder enabled is not currently supported. " + "Note: block splitting is enabled by default at high compression levels." + ); + } else { + RETURN_ERROR_IF( + zc->appliedParams.useExternalMatchfinder == 1, + parameter_unsupported, // @nocommit Make this a parameter *combination* error? + "Long-distance matching with external matchfinder enabled is not currently supported." + ); + } + /* Updates ldmSeqStore.pos */ lastLLSize = ZSTD_ldm_blockCompress(&zc->externSeqStore, @@ -3012,6 +3021,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { rawSeqStore_t ldmSeqStore = kNullRawSeqStore; + /* External matchfinder + LDM is technically possible, just not implemented yet. + * We need to revisit soon and implement it. */ + RETURN_ERROR_IF( + zc->appliedParams.useExternalMatchfinder == 1, + parameter_unsupported, // @nocommit Make this a parameter *combination* error? + "Long-distance matching with external matchfinder enabled is not currently supported." + ); + ldmSeqStore.seq = zc->ldmSequences; ldmSeqStore.capacity = zc->maxNbLdmSequences; /* Updates ldmSeqStore.size */ From d955d1686642074305f856907557ceb062c812d1 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 23 Nov 2022 23:44:59 -0500 Subject: [PATCH 13/60] Error API with optional fallback --- contrib/externalMatchfinder/main.c | 7 ++++ contrib/externalMatchfinder/matchfinder.c | 15 ++++---- contrib/externalMatchfinder/matchfinder.h | 2 +- lib/common/error_private.c | 1 + lib/compress/zstd_compress.c | 43 +++++++++++++++++++---- lib/compress/zstd_compress_internal.h | 4 +++ lib/zstd.h | 32 +++++++++++++---- lib/zstd_errors.h | 1 + 8 files changed, 83 insertions(+), 22 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index af741245957..a504a956614 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -41,6 +41,13 @@ int main(int argc, char *argv[]) { return 1; } + res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchfinderFallback, 1); + + if (ZSTD_isError(res)) { + printf("ERROR: %s\n", ZSTD_getErrorName(res)); + return 1; + } + FILE *f = fopen(argv[1], "rb"); fseek(f, 0, SEEK_END); long srcSize = ftell(f); diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index aa20aacbe73..c88126f6800 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -7,7 +7,7 @@ static U32 const HLOG = 10; static U32 const MLS = 4; static U32 const BADIDX = (1 << 31); -size_t simpleExternalMatchFinder( +ZSTD_externalMatchResult simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize ) { @@ -52,14 +52,13 @@ size_t simpleExternalMatchFinder( ip++; } - { - ZSTD_Sequence const finalSeq = { - 0, iend - anchor, 0, 0 - }; - outSeqs[seqCount] = finalSeq; - } + ZSTD_Sequence const finalSeq = { + 0, iend - anchor, 0, 0 + }; + outSeqs[seqCount] = finalSeq; - return seqCount; + ZSTD_externalMatchResult res = {seqCount, ZSTD_emf_error_none}; + return res; } void simpleExternalMatchStateDestructor(void* externalMatchState) { diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index 5ad0909c6b2..18a02efdf61 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -4,7 +4,7 @@ #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" -size_t simpleExternalMatchFinder( +ZSTD_externalMatchResult simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize ); diff --git a/lib/common/error_private.c b/lib/common/error_private.c index fe73c5edd4b..2e81e07a0aa 100644 --- a/lib/common/error_private.c +++ b/lib/common/error_private.c @@ -51,6 +51,7 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; + case PREFIX(externalMatchFinder_failed): return "External matchfinder returned a non-zero error code"; case PREFIX(maxCode): default: return notErrorCode; } diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 6ba37be3857..7b7c47bb64e 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -593,6 +593,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = 1; return bounds; + case ZSTD_c_enableMatchfinderFallback: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + default: bounds.error = ERROR(parameter_unsupported); return bounds; @@ -659,6 +664,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: case ZSTD_c_useExternalMatchfinder: + case ZSTD_c_enableMatchfinderFallback: default: return 0; } @@ -723,6 +729,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: + case ZSTD_c_enableMatchfinderFallback: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -960,6 +967,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->useExternalMatchfinder = value; return CCtxParams->useExternalMatchfinder; + case ZSTD_c_enableMatchfinderFallback: + BOUNDCHECK(ZSTD_c_enableMatchfinderFallback, value); + CCtxParams->enableMatchfinderFallback = value; + return CCtxParams->enableMatchfinderFallback; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -1098,6 +1110,9 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_useExternalMatchfinder: *value = CCtxParams->useExternalMatchfinder; break; + case ZSTD_c_enableMatchfinderFallback: + *value = CCtxParams->enableMatchfinderFallback; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -3049,14 +3064,30 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->externalMatchCtx.mFinder == NULL, parameter_unsupported, "useExternalMatchfinder=1 but no external matchfinder is registered!" ); - size_t numSeqsFound = (zc->externalMatchCtx.mFinder)( + ZSTD_externalMatchResult matchResult = (zc->externalMatchCtx.mFinder)( NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, NULL, 0 ); - ZSTD_sequencePosition seqPos = {0,0,0}; - lastLLSize = 0; // @nocommit - ZSTD_copySequencesToSeqStoreExplicitBlockDelim( - zc, &seqPos, zc->externalMatchCtx.seqBuffer, numSeqsFound, src, srcSize - ); + if (matchResult.errorCode == ZSTD_emf_error_none) { + ZSTD_sequencePosition seqPos = {0,0,0}; + ms->ldmSeqStore = NULL; + lastLLSize = 0; + ZSTD_copySequencesToSeqStoreExplicitBlockDelim( + zc, &seqPos, zc->externalMatchCtx.seqBuffer, matchResult.nbSeqsFound, src, srcSize + ); + } else { + if (zc->appliedParams.enableMatchfinderFallback) { + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); + ms->ldmSeqStore = NULL; + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + } else { + RETURN_ERROR( + externalMatchFinder_failed, + "External matchfinder returned a non-zero error code!" + ); + } + } } else { /* not long range mode */ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 62b9a79fb33..d962e2785dc 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -355,6 +355,10 @@ struct ZSTD_CCtx_params_s { /* Controls block-level sequence compression API */ int useExternalMatchfinder; + + /* Controls whether zstd will fall back to an internal matchfinder + * when the external matchfinder returns a non-zero error code. */ + int enableMatchfinderFallback; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) diff --git a/lib/zstd.h b/lib/zstd.h index 362317ecb71..b2ba02a22c6 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -479,6 +479,7 @@ typedef enum { * ZSTD_c_useRowMatchFinder * ZSTD_c_prefetchCDictTables * ZSTD_c_useExternalMatchfinder + * ZSTD_c_enableMatchfinderFallback * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -499,7 +500,8 @@ typedef enum { ZSTD_c_experimentalParam14=1011, ZSTD_c_experimentalParam15=1012, ZSTD_c_experimentalParam16=1013, - ZSTD_c_experimentalParam17=1014 + ZSTD_c_experimentalParam17=1014, + ZSTD_c_experimentalParam18=1015 } ZSTD_cParameter; typedef struct { @@ -1513,15 +1515,29 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, /* Block-level sequence compression API */ // @nocommit improve docs +typedef enum { + /* External matchfinder successfully compressed the block. */ + ZSTD_emf_error_none = 0, + + /* External matchfinder hit an unspecified failure condition. + * Will fail the overall compression operation. */ + ZSTD_emf_error_generic= 1, +} ZSTD_externalMatchFinder_errorCode_e; + +typedef struct { + size_t nbSeqsFound; + ZSTD_externalMatchFinder_errorCode_e errorCode; +} ZSTD_externalMatchResult; + // @nocommit move bounds comments into docs -typedef size_t ZSTD_externalMatchFinder_F ( - void* externalMatchState, // TODO make this an externalMatchState type +typedef ZSTD_externalMatchResult ZSTD_externalMatchFinder_F ( + void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, // outSeqsCapacity >= blockSize / MINMATCH const void* src, size_t srcSize, const void* dict, size_t dictSize - // srcSize - historySize <= 128 KB - // historySize < srcSize ; any size + // srcSize <= 128 KB + // dictSize is not bounded ); typedef void ZSTD_externalMatchStateDestructor_F ( @@ -1536,8 +1552,7 @@ ZSTD_registerExternalMatchFinder( ZSTD_externalMatchStateDestructor_F* externalMatchStateDestructor ); -/* Note: calls the previously-registered ZSTD_externalMatchStateDestructor_F* - * if it is non-NULL. */ +/* Note: calls the previously-registered destructor if it is not NULL. */ ZSTDLIB_STATIC_API void ZSTD_clearExternalMatchFinder( ZSTD_CCtx* cctx @@ -2083,6 +2098,9 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo // @nocommit document #define ZSTD_c_useExternalMatchfinder ZSTD_c_experimentalParam17 +// @nocommit document +#define ZSTD_c_enableMatchfinderFallback ZSTD_c_experimentalParam18 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. diff --git a/lib/zstd_errors.h b/lib/zstd_errors.h index 880e8e4f6da..4edbfa54b9e 100644 --- a/lib/zstd_errors.h +++ b/lib/zstd_errors.h @@ -92,6 +92,7 @@ typedef enum { ZSTD_error_seekableIO = 102, ZSTD_error_dstBuffer_wrong = 104, ZSTD_error_srcBuffer_wrong = 105, + ZSTD_error_externalMatchFinder_failed = 106, ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ } ZSTD_ErrorCode; From a5241dbad8e11e39d38bde6ea3a67d9d1471ee95 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sat, 26 Nov 2022 20:48:49 -0500 Subject: [PATCH 14/60] handle RLE properly for external matchfinder --- lib/compress/zstd_compress.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 7b7c47bb64e..fee57ab64f0 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3232,12 +3232,18 @@ static int ZSTD_isRLE(const BYTE* src, size_t length) { * This is just a heuristic based on the compressibility. * It may return both false positives and false negatives. */ -static int ZSTD_maybeRLE(seqStore_t const* seqStore) +static int ZSTD_maybeRLE(seqStore_t const* seqStore, ZSTD_CCtx_params const* appliedParams) { size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); - return nbSeqs < 4 && nbLits < 10; + if (appliedParams->useExternalMatchfinder) { + /* We shouldn't make any assumptions about how an external matchfinder + * will compress an RLE block. */ + return 1; + } else { + return nbSeqs < 4 && nbLits < 10; + } } static void @@ -4087,7 +4093,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, * This is only an issue for zstd <= v1.4.3 */ !zc->isFirstBlock && - ZSTD_maybeRLE(&zc->seqStore) && + ZSTD_maybeRLE(&zc->seqStore, &zc->appliedParams) && ZSTD_isRLE((BYTE const*)src, srcSize)) { return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); @@ -6441,7 +6447,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); if (!cctx->isFirstBlock && - ZSTD_maybeRLE(&cctx->seqStore) && + ZSTD_maybeRLE(&cctx->seqStore, &cctx->appliedParams) && ZSTD_isRLE(ip, blockSize)) { /* We don't want to emit our first block as a RLE even if it qualifies because * doing so will cause the decoder (cli only) to throw a "should consume all input error." From 4defd0ed2b57e4f586dc2296e4721413ae459bde Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sat, 26 Nov 2022 21:32:17 -0500 Subject: [PATCH 15/60] nit --- contrib/externalMatchfinder/main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index a504a956614..b19e59c008a 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -7,8 +7,6 @@ #include "zstd_errors.h" #include "matchfinder.h" // simpleExternalMatchFinder, simpleExternalMatchStateDestructor -const size_t ZSTD_LEVEL = 1; - int main(int argc, char *argv[]) { size_t res; From f9fc1b19efcbe5b6353d9f75d6ec8c965274e387 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 29 Nov 2022 17:31:41 -0500 Subject: [PATCH 16/60] Move to a CDict-like model for resource ownership --- contrib/externalMatchfinder/main.c | 17 +-- contrib/externalMatchfinder/matchfinder.c | 9 +- contrib/externalMatchfinder/matchfinder.h | 4 +- lib/common/error_private.c | 2 +- lib/compress/zstd_compress.c | 135 +++++++--------------- lib/compress/zstd_compress_internal.h | 8 +- lib/zstd.h | 44 ++----- 7 files changed, 58 insertions(+), 161 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index b19e59c008a..2b216485694 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -20,25 +20,12 @@ int main(int argc, char *argv[]) { int simpleExternalMatchState = 0xdeadbeef; // @nocommit // Here is the crucial bit of code! - res = ZSTD_registerExternalMatchFinder( + ZSTD_refExternalMatchFinder( zc, &simpleExternalMatchState, - simpleExternalMatchFinder, - simpleExternalMatchStateDestructor + simpleExternalMatchFinder ); - if (ZSTD_isError(res)) { - printf("ERROR: %s\n", ZSTD_getErrorName(res)); - return 1; - } - - res = ZSTD_CCtx_setParameter(zc, ZSTD_c_useExternalMatchfinder, 1); - - if (ZSTD_isError(res)) { - printf("ERROR: %s\n", ZSTD_getErrorName(res)); - return 1; - } - res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchfinderFallback, 1); if (ZSTD_isError(res)) { diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index c88126f6800..5962e9635d8 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -7,7 +7,7 @@ static U32 const HLOG = 10; static U32 const MLS = 4; static U32 const BADIDX = (1 << 31); -ZSTD_externalMatchResult simpleExternalMatchFinder( +size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize ) { @@ -57,10 +57,5 @@ ZSTD_externalMatchResult simpleExternalMatchFinder( }; outSeqs[seqCount] = finalSeq; - ZSTD_externalMatchResult res = {seqCount, ZSTD_emf_error_none}; - return res; -} - -void simpleExternalMatchStateDestructor(void* externalMatchState) { - (void)externalMatchState; + return seqCount; } diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index 18a02efdf61..3fc62f893ab 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -4,11 +4,9 @@ #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" -ZSTD_externalMatchResult simpleExternalMatchFinder( +size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize ); -void simpleExternalMatchStateDestructor(void* matchState); - #endif diff --git a/lib/common/error_private.c b/lib/common/error_private.c index 2e81e07a0aa..5aa1e96ee8f 100644 --- a/lib/common/error_private.c +++ b/lib/common/error_private.c @@ -51,7 +51,7 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; - case PREFIX(externalMatchFinder_failed): return "External matchfinder returned a non-zero error code"; + case PREFIX(externalMatchFinder_failed): return "External matchfinder returned an error code"; case PREFIX(maxCode): default: return notErrorCode; } diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index fee57ab64f0..121455d67ff 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -173,7 +173,6 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) assert(cctx != NULL); assert(cctx->staticSize == 0); ZSTD_clearAllDicts(cctx); - ZSTD_clearExternalMatchFinder(cctx); #ifdef ZSTD_MULTITHREAD ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; #endif @@ -588,11 +587,6 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = (int)ZSTD_ps_disable; return bounds; - case ZSTD_c_useExternalMatchfinder: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - case ZSTD_c_enableMatchfinderFallback: bounds.lowerBound = 0; bounds.upperBound = 1; @@ -663,7 +657,6 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: - case ZSTD_c_useExternalMatchfinder: case ZSTD_c_enableMatchfinderFallback: default: return 0; @@ -687,14 +680,6 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) "MT not compatible with static alloc"); break; - case ZSTD_c_useExternalMatchfinder: - RETURN_ERROR_IF( - (value == 1) && (cctx->externalMatchCtx.mFinder == NULL), - parameter_unsupported, - "Must register an external matchfinder to set useExternalMatchfinder=1" - ); - break; - case ZSTD_c_compressionLevel: case ZSTD_c_windowLog: case ZSTD_c_hashLog: @@ -962,11 +947,6 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value; return CCtxParams->prefetchCDictTables; - case ZSTD_c_useExternalMatchfinder: - BOUNDCHECK(ZSTD_c_useExternalMatchfinder, value); - CCtxParams->useExternalMatchfinder = value; - return CCtxParams->useExternalMatchfinder; - case ZSTD_c_enableMatchfinderFallback: BOUNDCHECK(ZSTD_c_enableMatchfinderFallback, value); CCtxParams->enableMatchfinderFallback = value; @@ -1107,9 +1087,6 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_prefetchCDictTables: *value = (int)CCtxParams->prefetchCDictTables; break; - case ZSTD_c_useExternalMatchfinder: - *value = CCtxParams->useExternalMatchfinder; - break; case ZSTD_c_enableMatchfinderFallback: *value = CCtxParams->enableMatchfinderFallback; break; @@ -2911,56 +2888,6 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; -// @nocommit External matchfinder, need to move this at some point -size_t ZSTD_registerExternalMatchFinder( - ZSTD_CCtx* zc, void* mState, - ZSTD_externalMatchFinder_F* mFinder, - ZSTD_externalMatchStateDestructor_F* mStateDestructor -) { - RETURN_ERROR_IF( - zc->staticSize, parameter_unsupported, - "External matchfinder is not compatible with static alloc" - ); - - RETURN_ERROR_IF( - (mState && !mStateDestructor) || (!mState && mStateDestructor), - GENERIC, - "Cannot provide external match state without corresponding destructor, or vice versa!" - ); - - ZSTD_clearExternalMatchFinder(zc); - - size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); - ZSTD_Sequence* seqBuffer = (ZSTD_Sequence*)ZSTD_malloc(seqBufferCapacity * sizeof(ZSTD_Sequence)); - if (seqBuffer == NULL) { - return ZSTD_error_memory_allocation; - } - - ZSTD_externalMatchCtx emctx = { - mState, - mFinder, - mStateDestructor, - seqBuffer, - seqBufferCapacity - }; - zc->externalMatchCtx = emctx; - - return ZSTD_error_no_error; -} - -void ZSTD_clearExternalMatchFinder( - ZSTD_CCtx* zc -) { - ZSTD_externalMatchCtx const emctx = zc->externalMatchCtx; - if (emctx.mFinder != NULL) { - if (emctx.mStateDestructor != NULL) { - (emctx.mStateDestructor)(emctx.mState); - } - ZSTD_free(emctx.seqBuffer); - ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx)); - } -} - static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) { ZSTD_matchState_t* const ms = &zc->blockState.matchState; @@ -3012,14 +2939,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) * We need to revisit soon and implement it. */ if (zc->appliedParams.useBlockSplitter == ZSTD_ps_enable) { RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchfinder == 1, + zc->externalMatchCtx.mFinder != NULL, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Block splitting with external matchfinder enabled is not currently supported. " "Note: block splitting is enabled by default at high compression levels." ); } else { RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchfinder == 1, + zc->externalMatchCtx.mFinder != NULL, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Long-distance matching with external matchfinder enabled is not currently supported." ); @@ -3039,7 +2966,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* External matchfinder + LDM is technically possible, just not implemented yet. * We need to revisit soon and implement it. */ RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchfinder == 1, + zc->externalMatchCtx.mFinder != NULL, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Long-distance matching with external matchfinder enabled is not currently supported." ); @@ -3058,22 +2985,26 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); - } else if (zc->appliedParams.useExternalMatchfinder) { - // @nocommit document lack of dictionary support in function call - RETURN_ERROR_IF( - zc->externalMatchCtx.mFinder == NULL, parameter_unsupported, - "useExternalMatchfinder=1 but no external matchfinder is registered!" + } else if (zc->externalMatchCtx.mFinder != NULL) { + assert( + zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize) ); - ZSTD_externalMatchResult matchResult = (zc->externalMatchCtx.mFinder)( - NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, NULL, 0 + + size_t nbExternalSeqs = (zc->externalMatchCtx.mFinder)( + zc->externalMatchCtx.mState, + zc->externalMatchCtx.seqBuffer, + zc->externalMatchCtx.seqBufferCapacity, + src, srcSize, + NULL, 0 /* dict and dictSize, currently not supported */ ); - if (matchResult.errorCode == ZSTD_emf_error_none) { + + if (nbExternalSeqs <= zc->externalMatchCtx.seqBufferCapacity) { ZSTD_sequencePosition seqPos = {0,0,0}; - ms->ldmSeqStore = NULL; - lastLLSize = 0; ZSTD_copySequencesToSeqStoreExplicitBlockDelim( - zc, &seqPos, zc->externalMatchCtx.seqBuffer, matchResult.nbSeqsFound, src, srcSize + zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbExternalSeqs, src, srcSize ); + ms->ldmSeqStore = NULL; + lastLLSize = 0; } else { if (zc->appliedParams.enableMatchfinderFallback) { ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, @@ -3084,11 +3015,11 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) } else { RETURN_ERROR( externalMatchFinder_failed, - "External matchfinder returned a non-zero error code!" + "External matchfinder returned an error code!" ); } } - } else { /* not long range mode */ + } else { /* not long range mode and no external matchfinder */ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, dictMode); @@ -3232,12 +3163,12 @@ static int ZSTD_isRLE(const BYTE* src, size_t length) { * This is just a heuristic based on the compressibility. * It may return both false positives and false negatives. */ -static int ZSTD_maybeRLE(seqStore_t const* seqStore, ZSTD_CCtx_params const* appliedParams) +static int ZSTD_maybeRLE(seqStore_t const* seqStore, ZSTD_externalMatchCtx const* externalMatchCtx) { size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); - if (appliedParams->useExternalMatchfinder) { + if (externalMatchCtx->mFinder != NULL) { /* We shouldn't make any assumptions about how an external matchfinder * will compress an RLE block. */ return 1; @@ -4093,7 +4024,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, * This is only an issue for zstd <= v1.4.3 */ !zc->isFirstBlock && - ZSTD_maybeRLE(&zc->seqStore, &zc->appliedParams) && + ZSTD_maybeRLE(&zc->seqStore, &zc->externalMatchCtx) && ZSTD_isRLE((BYTE const*)src, srcSize)) { return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); @@ -6447,7 +6378,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); if (!cctx->isFirstBlock && - ZSTD_maybeRLE(&cctx->seqStore, &cctx->appliedParams) && + ZSTD_maybeRLE(&cctx->seqStore, &cctx->externalMatchCtx) && ZSTD_isRLE(ip, blockSize)) { /* We don't want to emit our first block as a RLE even if it qualifies because * doing so will cause the decoder (cli only) to throw a "should consume all input error." @@ -6720,3 +6651,21 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); } + +void ZSTD_refExternalMatchFinder( + ZSTD_CCtx* zc, void* mState, + ZSTD_externalMatchFinder_F* mFinder +) { + size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); + ZSTD_Sequence* const seqBuffer = malloc(seqBufferCapacity * sizeof(ZSTD_Sequence)); + + ZSTD_externalMatchCtx emctx = { + mState, + mFinder, + + /* @nocommit seqBuffer should move into the cwksp */ + seqBuffer, + seqBufferCapacity + }; + zc->externalMatchCtx = emctx; +} diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index d962e2785dc..433a2c9e71e 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -353,11 +353,8 @@ struct ZSTD_CCtx_params_s { /* Controls prefetching in some dictMatchState matchfinders */ ZSTD_paramSwitch_e prefetchCDictTables; - /* Controls block-level sequence compression API */ - int useExternalMatchfinder; - /* Controls whether zstd will fall back to an internal matchfinder - * when the external matchfinder returns a non-zero error code. */ + * when the external matchfinder returns an error code. */ int enableMatchfinderFallback; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ @@ -395,7 +392,6 @@ typedef struct { typedef struct { void* mState; ZSTD_externalMatchFinder_F* mFinder; - ZSTD_externalMatchStateDestructor_F* mStateDestructor; ZSTD_Sequence* seqBuffer; size_t seqBufferCapacity; } ZSTD_externalMatchCtx; @@ -470,7 +466,7 @@ struct ZSTD_CCtx_s { /* Workspace for block splitter */ ZSTD_blockSplitCtx blockSplitCtx; - /* External block matchfinder */ + /* Workspace for external matchfinder */ ZSTD_externalMatchCtx externalMatchCtx; }; diff --git a/lib/zstd.h b/lib/zstd.h index b2ba02a22c6..8085a68067b 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -478,7 +478,6 @@ typedef enum { * ZSTD_c_useBlockSplitter * ZSTD_c_useRowMatchFinder * ZSTD_c_prefetchCDictTables - * ZSTD_c_useExternalMatchfinder * ZSTD_c_enableMatchfinderFallback * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; @@ -500,8 +499,7 @@ typedef enum { ZSTD_c_experimentalParam14=1011, ZSTD_c_experimentalParam15=1012, ZSTD_c_experimentalParam16=1013, - ZSTD_c_experimentalParam17=1014, - ZSTD_c_experimentalParam18=1015 + ZSTD_c_experimentalParam17=1014 } ZSTD_cParameter; typedef struct { @@ -564,7 +562,7 @@ typedef enum { * They will be used to compress next frame. * Resetting session never fails. * - The parameters : changes all parameters back to "default". - * This removes any reference to any dictionary too. + * This removes any reference to any dictionary too. @nocommit mention external matchfinder * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) * - Both : similar to resetting the session, followed by resetting parameters. @@ -1515,22 +1513,10 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, /* Block-level sequence compression API */ // @nocommit improve docs -typedef enum { - /* External matchfinder successfully compressed the block. */ - ZSTD_emf_error_none = 0, - - /* External matchfinder hit an unspecified failure condition. - * Will fail the overall compression operation. */ - ZSTD_emf_error_generic= 1, -} ZSTD_externalMatchFinder_errorCode_e; - -typedef struct { - size_t nbSeqsFound; - ZSTD_externalMatchFinder_errorCode_e errorCode; -} ZSTD_externalMatchResult; +#define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1)) // @nocommit move bounds comments into docs -typedef ZSTD_externalMatchResult ZSTD_externalMatchFinder_F ( +typedef size_t ZSTD_externalMatchFinder_F ( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, // outSeqsCapacity >= blockSize / MINMATCH @@ -1540,22 +1526,11 @@ typedef ZSTD_externalMatchResult ZSTD_externalMatchFinder_F ( // dictSize is not bounded ); -typedef void ZSTD_externalMatchStateDestructor_F ( - void* externalMatchState -); - -ZSTDLIB_STATIC_API size_t -ZSTD_registerExternalMatchFinder( +ZSTDLIB_STATIC_API void +ZSTD_refExternalMatchFinder( ZSTD_CCtx* cctx, void* externalMatchState, - ZSTD_externalMatchFinder_F* externalMatchFinder, - ZSTD_externalMatchStateDestructor_F* externalMatchStateDestructor -); - -/* Note: calls the previously-registered destructor if it is not NULL. */ -ZSTDLIB_STATIC_API void -ZSTD_clearExternalMatchFinder( - ZSTD_CCtx* cctx + ZSTD_externalMatchFinder_F* externalMatchFinder ); /****************************************/ @@ -2096,10 +2071,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 // @nocommit document -#define ZSTD_c_useExternalMatchfinder ZSTD_c_experimentalParam17 - -// @nocommit document -#define ZSTD_c_enableMatchfinderFallback ZSTD_c_experimentalParam18 +#define ZSTD_c_enableMatchfinderFallback ZSTD_c_experimentalParam17 /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, From c126ecd2310513e6f810716dffdf2f4fe289ee42 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 30 Nov 2022 17:07:37 -0500 Subject: [PATCH 17/60] Add hidden useExternalMatchfinder bool to CCtx_params_s --- lib/compress/zstd_compress.c | 20 ++++++++++++-------- lib/compress/zstd_compress_internal.h | 7 ++++++- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 121455d67ff..a9a30e73a61 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2939,14 +2939,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) * We need to revisit soon and implement it. */ if (zc->appliedParams.useBlockSplitter == ZSTD_ps_enable) { RETURN_ERROR_IF( - zc->externalMatchCtx.mFinder != NULL, + zc->appliedParams.useExternalMatchfinder, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Block splitting with external matchfinder enabled is not currently supported. " "Note: block splitting is enabled by default at high compression levels." ); } else { RETURN_ERROR_IF( - zc->externalMatchCtx.mFinder != NULL, + zc->appliedParams.useExternalMatchfinder, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Long-distance matching with external matchfinder enabled is not currently supported." ); @@ -2966,7 +2966,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* External matchfinder + LDM is technically possible, just not implemented yet. * We need to revisit soon and implement it. */ RETURN_ERROR_IF( - zc->externalMatchCtx.mFinder != NULL, + zc->appliedParams.useExternalMatchfinder, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Long-distance matching with external matchfinder enabled is not currently supported." ); @@ -2985,10 +2985,11 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); - } else if (zc->externalMatchCtx.mFinder != NULL) { + } else if (zc->appliedParams.useExternalMatchfinder) { assert( zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize) ); + assert(zc->externalMatchCtx.mFinder != NULL); size_t nbExternalSeqs = (zc->externalMatchCtx.mFinder)( zc->externalMatchCtx.mState, @@ -3020,6 +3021,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) } } } else { /* not long range mode and no external matchfinder */ + assert(zc->externalMatchCtx.mFinder == NULL); ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, dictMode); @@ -3163,12 +3165,12 @@ static int ZSTD_isRLE(const BYTE* src, size_t length) { * This is just a heuristic based on the compressibility. * It may return both false positives and false negatives. */ -static int ZSTD_maybeRLE(seqStore_t const* seqStore, ZSTD_externalMatchCtx const* externalMatchCtx) +static int ZSTD_maybeRLE(seqStore_t const* seqStore, ZSTD_CCtx_params const* appliedParams) { size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); - if (externalMatchCtx->mFinder != NULL) { + if (appliedParams->useExternalMatchfinder) { /* We shouldn't make any assumptions about how an external matchfinder * will compress an RLE block. */ return 1; @@ -4024,7 +4026,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, * This is only an issue for zstd <= v1.4.3 */ !zc->isFirstBlock && - ZSTD_maybeRLE(&zc->seqStore, &zc->externalMatchCtx) && + ZSTD_maybeRLE(&zc->seqStore, &zc->appliedParams) && ZSTD_isRLE((BYTE const*)src, srcSize)) { return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); @@ -6378,7 +6380,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); if (!cctx->isFirstBlock && - ZSTD_maybeRLE(&cctx->seqStore, &cctx->externalMatchCtx) && + ZSTD_maybeRLE(&cctx->seqStore, &cctx->appliedParams) && ZSTD_isRLE(ip, blockSize)) { /* We don't want to emit our first block as a RLE even if it qualifies because * doing so will cause the decoder (cli only) to throw a "should consume all input error." @@ -6659,6 +6661,8 @@ void ZSTD_refExternalMatchFinder( size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); ZSTD_Sequence* const seqBuffer = malloc(seqBufferCapacity * sizeof(ZSTD_Sequence)); + zc->requestedParams.useExternalMatchfinder = 1; + ZSTD_externalMatchCtx emctx = { mState, mFinder, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 433a2c9e71e..5fc25f65f6c 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -354,8 +354,13 @@ struct ZSTD_CCtx_params_s { ZSTD_paramSwitch_e prefetchCDictTables; /* Controls whether zstd will fall back to an internal matchfinder - * when the external matchfinder returns an error code. */ + * if the external matchfinder returns an error code. */ int enableMatchfinderFallback; + + /* Indicates whether an external matchfinder has been referenced. + * Users can't set this externally. + * It is set internally in ZSTD_refExternalMatchfinder(). */ + int useExternalMatchfinder; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) From aeb060df1f21a2ae60b95f0ed67399d887a15903 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 30 Nov 2022 17:42:28 -0500 Subject: [PATCH 18/60] Eliminate malloc, move to cwksp allocation --- lib/compress/zstd_compress.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index a9a30e73a61..44d060d9262 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -28,9 +28,6 @@ #include "zstd_ldm.h" #include "zstd_compress_superblock.h" #include "../common/bits.h" /* ZSTD_highbit32 */ -#define ZSTD_DEPS_NEED_MALLOC -#include "../common/zstd_deps.h" /* ZSTD_malloc */ -#include /* *************************************************************** * Tuning parameters @@ -1510,7 +1507,8 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( const ZSTD_paramSwitch_e useRowMatchFinder, const size_t buffInSize, const size_t buffOutSize, - const U64 pledgedSrcSize) + const U64 pledgedSrcSize, + int useExternalMatchfinder) { size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); @@ -1534,6 +1532,9 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; + size_t const externalSeqSpace = useExternalMatchfinder ? + ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX) * sizeof(ZSTD_Sequence) : 0; + size_t const neededSpace = cctxSpace + entropySpace + @@ -1542,7 +1543,8 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( ldmSeqSpace + matchStateSize + tokenSpace + - bufferSpace; + bufferSpace + + externalSeqSpace; DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); return neededSpace; @@ -1560,7 +1562,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) * be needed. However, we still allocate two 0-sized buffers, which can * take space under ASAN. */ return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchfinder); } size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) @@ -1621,7 +1623,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) return ZSTD_estimateCCtxSize_usingCCtxParams_internal( &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, - ZSTD_CONTENTSIZE_UNKNOWN); + ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchfinder); } } @@ -1922,7 +1924,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const neededSpace = ZSTD_estimateCCtxSize_usingCCtxParams_internal( ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, - buffInSize, buffOutSize, pledgedSrcSize); + buffInSize, buffOutSize, pledgedSrcSize, params->useExternalMatchfinder); int resizeWorkspace; FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); @@ -2035,6 +2037,14 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->ldmState.loadedDictEnd = 0; } + /* reserve space for block-level external sequences */ + if (params->useExternalMatchfinder) { + size_t const maxNbExternalSeq = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); + zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq; + zc->externalMatchCtx.seqBuffer = + (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence)); + } + DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace)); @@ -6658,18 +6668,14 @@ void ZSTD_refExternalMatchFinder( ZSTD_CCtx* zc, void* mState, ZSTD_externalMatchFinder_F* mFinder ) { - size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); - ZSTD_Sequence* const seqBuffer = malloc(seqBufferCapacity * sizeof(ZSTD_Sequence)); - zc->requestedParams.useExternalMatchfinder = 1; - ZSTD_externalMatchCtx emctx = { mState, mFinder, - /* @nocommit seqBuffer should move into the cwksp */ - seqBuffer, - seqBufferCapacity + /* seqBuffer is allocated later (from the cwskp) */ + NULL, /* seqBuffer */ + 0 /* seqBufferCapacity */ }; zc->externalMatchCtx = emctx; } From 349c36e8aecd7e532e38d045c9dd174b8a1e4393 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 30 Nov 2022 18:12:08 -0500 Subject: [PATCH 19/60] Handle CCtx reset properly --- lib/compress/zstd_compress.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 44d060d9262..da04185b4ce 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -158,6 +158,13 @@ static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) cctx->cdict = NULL; } +/* Clears the external matchfinder, if one has been referenced. + * Does not free any resources owned by the external match state! */ +static void ZSTD_clearExternalMatchCtx(ZSTD_CCtx* cctx) { + ZSTD_externalMatchCtx nullCtx = {0}; + cctx->externalMatchCtx = nullCtx; +} + static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) { size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; @@ -1258,6 +1265,7 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, "Can't reset parameters only when not in init stage."); ZSTD_clearAllDicts(cctx); + ZSTD_clearExternalMatchCtx(cctx); return ZSTD_CCtxParams_reset(&cctx->requestedParams); } return 0; From 80ec8aa1cc32cbb56f8e1c280f75875962de7e5b Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 30 Nov 2022 18:23:45 -0500 Subject: [PATCH 20/60] Ensure seqStore has enough space for external sequences --- lib/compress/zstd_compress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index da04185b4ce..ad80765191d 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1520,7 +1520,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( { size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (cParams->minMatch==3) ? 3 : 4; + U32 const divider = (cParams->minMatch==3 || useExternalMatchfinder) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) @@ -1914,7 +1914,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (params->cParams.minMatch==3) ? 3 : 4; + U32 const divider = (params->cParams.minMatch==3 || params->useExternalMatchfinder) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 From bf69b2cc21a5c83fdc4aa314753ead64e2c5d31a Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 1 Dec 2022 13:42:31 -0500 Subject: [PATCH 21/60] fix capitalization --- contrib/externalMatchfinder/main.c | 2 +- lib/compress/zstd_compress.c | 48 +++++++++++++-------------- lib/compress/zstd_compress_internal.h | 6 ++-- lib/zstd.h | 4 +-- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 2b216485694..60966a9eb4d 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -26,7 +26,7 @@ int main(int argc, char *argv[]) { simpleExternalMatchFinder ); - res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchfinderFallback, 1); + res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 1); if (ZSTD_isError(res)) { printf("ERROR: %s\n", ZSTD_getErrorName(res)); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index ad80765191d..07356a1f629 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -591,7 +591,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = (int)ZSTD_ps_disable; return bounds; - case ZSTD_c_enableMatchfinderFallback: + case ZSTD_c_enableMatchFinderFallback: bounds.lowerBound = 0; bounds.upperBound = 1; return bounds; @@ -661,7 +661,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: - case ZSTD_c_enableMatchfinderFallback: + case ZSTD_c_enableMatchFinderFallback: default: return 0; } @@ -718,7 +718,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: - case ZSTD_c_enableMatchfinderFallback: + case ZSTD_c_enableMatchFinderFallback: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -951,10 +951,10 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value; return CCtxParams->prefetchCDictTables; - case ZSTD_c_enableMatchfinderFallback: - BOUNDCHECK(ZSTD_c_enableMatchfinderFallback, value); - CCtxParams->enableMatchfinderFallback = value; - return CCtxParams->enableMatchfinderFallback; + case ZSTD_c_enableMatchFinderFallback: + BOUNDCHECK(ZSTD_c_enableMatchFinderFallback, value); + CCtxParams->enableMatchFinderFallback = value; + return CCtxParams->enableMatchFinderFallback; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } @@ -1091,8 +1091,8 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_prefetchCDictTables: *value = (int)CCtxParams->prefetchCDictTables; break; - case ZSTD_c_enableMatchfinderFallback: - *value = CCtxParams->enableMatchfinderFallback; + case ZSTD_c_enableMatchFinderFallback: + *value = CCtxParams->enableMatchFinderFallback; break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } @@ -1516,11 +1516,11 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( const size_t buffInSize, const size_t buffOutSize, const U64 pledgedSrcSize, - int useExternalMatchfinder) + int useExternalMatchFinder) { size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (cParams->minMatch==3 || useExternalMatchfinder) ? 3 : 4; + U32 const divider = (cParams->minMatch==3 || useExternalMatchFinder) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) @@ -1540,7 +1540,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; - size_t const externalSeqSpace = useExternalMatchfinder ? + size_t const externalSeqSpace = useExternalMatchFinder ? ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX) * sizeof(ZSTD_Sequence) : 0; size_t const neededSpace = @@ -1570,7 +1570,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) * be needed. However, we still allocate two 0-sized buffers, which can * take space under ASAN. */ return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchfinder); + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder); } size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) @@ -1631,7 +1631,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) return ZSTD_estimateCCtxSize_usingCCtxParams_internal( &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, - ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchfinder); + ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder); } } @@ -1914,7 +1914,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (params->cParams.minMatch==3 || params->useExternalMatchfinder) ? 3 : 4; + U32 const divider = (params->cParams.minMatch==3 || params->useExternalMatchFinder) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 @@ -1932,7 +1932,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const neededSpace = ZSTD_estimateCCtxSize_usingCCtxParams_internal( ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, - buffInSize, buffOutSize, pledgedSrcSize, params->useExternalMatchfinder); + buffInSize, buffOutSize, pledgedSrcSize, params->useExternalMatchFinder); int resizeWorkspace; FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); @@ -2046,7 +2046,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, } /* reserve space for block-level external sequences */ - if (params->useExternalMatchfinder) { + if (params->useExternalMatchFinder) { size_t const maxNbExternalSeq = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq; zc->externalMatchCtx.seqBuffer = @@ -2957,14 +2957,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) * We need to revisit soon and implement it. */ if (zc->appliedParams.useBlockSplitter == ZSTD_ps_enable) { RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchfinder, + zc->appliedParams.useExternalMatchFinder, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Block splitting with external matchfinder enabled is not currently supported. " "Note: block splitting is enabled by default at high compression levels." ); } else { RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchfinder, + zc->appliedParams.useExternalMatchFinder, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Long-distance matching with external matchfinder enabled is not currently supported." ); @@ -2984,7 +2984,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* External matchfinder + LDM is technically possible, just not implemented yet. * We need to revisit soon and implement it. */ RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchfinder, + zc->appliedParams.useExternalMatchFinder, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Long-distance matching with external matchfinder enabled is not currently supported." ); @@ -3003,7 +3003,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); - } else if (zc->appliedParams.useExternalMatchfinder) { + } else if (zc->appliedParams.useExternalMatchFinder) { assert( zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize) ); @@ -3025,7 +3025,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ms->ldmSeqStore = NULL; lastLLSize = 0; } else { - if (zc->appliedParams.enableMatchfinderFallback) { + if (zc->appliedParams.enableMatchFinderFallback) { ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, dictMode); @@ -3188,7 +3188,7 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore, ZSTD_CCtx_params const* app size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); - if (appliedParams->useExternalMatchfinder) { + if (appliedParams->useExternalMatchFinder) { /* We shouldn't make any assumptions about how an external matchfinder * will compress an RLE block. */ return 1; @@ -6676,7 +6676,7 @@ void ZSTD_refExternalMatchFinder( ZSTD_CCtx* zc, void* mState, ZSTD_externalMatchFinder_F* mFinder ) { - zc->requestedParams.useExternalMatchfinder = 1; + zc->requestedParams.useExternalMatchFinder = 1; ZSTD_externalMatchCtx emctx = { mState, mFinder, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 5fc25f65f6c..b54d50b17c1 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -355,12 +355,12 @@ struct ZSTD_CCtx_params_s { /* Controls whether zstd will fall back to an internal matchfinder * if the external matchfinder returns an error code. */ - int enableMatchfinderFallback; + int enableMatchFinderFallback; /* Indicates whether an external matchfinder has been referenced. * Users can't set this externally. - * It is set internally in ZSTD_refExternalMatchfinder(). */ - int useExternalMatchfinder; + * It is set internally in ZSTD_refExternalMatchFinder(). */ + int useExternalMatchFinder; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) diff --git a/lib/zstd.h b/lib/zstd.h index 8085a68067b..49ed8f004a3 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -478,7 +478,7 @@ typedef enum { * ZSTD_c_useBlockSplitter * ZSTD_c_useRowMatchFinder * ZSTD_c_prefetchCDictTables - * ZSTD_c_enableMatchfinderFallback + * ZSTD_c_enableMatchFinderFallback * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -2071,7 +2071,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 // @nocommit document -#define ZSTD_c_enableMatchfinderFallback ZSTD_c_experimentalParam17 +#define ZSTD_c_enableMatchFinderFallback ZSTD_c_experimentalParam17 /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, From 5c4891f38479e0460b7fa13172ea7a95b80f086d Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 1 Dec 2022 14:03:19 -0500 Subject: [PATCH 22/60] Add DEBUGLOG statements --- lib/compress/zstd_compress.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 07356a1f629..fbe0586855f 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3024,12 +3024,18 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ); ms->ldmSeqStore = NULL; lastLLSize = 0; + DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs); } else { if (zc->appliedParams.enableMatchFinderFallback) { ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, dictMode); ms->ldmSeqStore = NULL; + DEBUGLOG( + 5, + "External matchfinder returned error code %lu. Falling back to internal matchfinder.", + (unsigned long)nbExternalSeqs + ); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } else { RETURN_ERROR( From 72e50fa3b563ed2cc5edca5b6891ce441de36ee2 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 1 Dec 2022 14:18:09 -0500 Subject: [PATCH 23/60] Add compressionLevel param to matchfinder API --- contrib/externalMatchfinder/matchfinder.c | 7 +++++-- contrib/externalMatchfinder/matchfinder.h | 4 +++- lib/compress/zstd_compress.c | 3 ++- lib/zstd.h | 3 ++- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index 5962e9635d8..35914fb69bb 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -9,12 +9,15 @@ static U32 const BADIDX = (1 << 31); size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, - const void* src, size_t srcSize, const void* dict, size_t dictSize + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel ) { (void)externalMatchState; (void)dict; (void)dictSize; - (void)outSeqsCapacity; // @nocommit return an error + (void)outSeqsCapacity; + (void)compressionLevel; const BYTE* const istart = (const BYTE*)src; const BYTE* const iend = istart + srcSize; diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index 3fc62f893ab..1d52531fc11 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -6,7 +6,9 @@ size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, - const void* src, size_t srcSize, const void* dict, size_t dictSize + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel ); #endif diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index fbe0586855f..7649c577ae3 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3014,7 +3014,8 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, - NULL, 0 /* dict and dictSize, currently not supported */ + NULL, 0, /* dict and dictSize, currently not supported */ + zc->appliedParams.compressionLevel ); if (nbExternalSeqs <= zc->externalMatchCtx.seqBufferCapacity) { diff --git a/lib/zstd.h b/lib/zstd.h index 49ed8f004a3..26656865e55 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1521,7 +1521,8 @@ typedef size_t ZSTD_externalMatchFinder_F ( ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, // outSeqsCapacity >= blockSize / MINMATCH const void* src, size_t srcSize, - const void* dict, size_t dictSize + const void* dict, size_t dictSize, + int compressionLevel // srcSize <= 128 KB // dictSize is not bounded ); From 56e66332d942fa6afe797f630ce9e35839585f25 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 1 Dec 2022 14:40:56 -0500 Subject: [PATCH 24/60] fix c99 issues and add a param combination error code --- lib/common/error_private.c | 1 + lib/compress/zstd_compress.c | 76 +++++++++++++-------------- lib/compress/zstd_compress_internal.h | 3 +- lib/zstd.h | 12 ++--- lib/zstd_errors.h | 1 + 5 files changed, 47 insertions(+), 46 deletions(-) diff --git a/lib/common/error_private.c b/lib/common/error_private.c index 5aa1e96ee8f..fb4d7059621 100644 --- a/lib/common/error_private.c +++ b/lib/common/error_private.c @@ -31,6 +31,7 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification"; case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters"; case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; case PREFIX(init_missing): return "Context should be init first"; case PREFIX(memory_allocation): return "Allocation error : not enough memory"; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 7649c577ae3..e9a62c145d1 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2958,14 +2958,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) if (zc->appliedParams.useBlockSplitter == ZSTD_ps_enable) { RETURN_ERROR_IF( zc->appliedParams.useExternalMatchFinder, - parameter_unsupported, // @nocommit Make this a parameter *combination* error? + parameter_combination_unsupported, "Block splitting with external matchfinder enabled is not currently supported. " "Note: block splitting is enabled by default at high compression levels." ); } else { RETURN_ERROR_IF( zc->appliedParams.useExternalMatchFinder, - parameter_unsupported, // @nocommit Make this a parameter *combination* error? + parameter_combination_unsupported, "Long-distance matching with external matchfinder enabled is not currently supported." ); } @@ -2985,7 +2985,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) * We need to revisit soon and implement it. */ RETURN_ERROR_IF( zc->appliedParams.useExternalMatchFinder, - parameter_unsupported, // @nocommit Make this a parameter *combination* error? + parameter_combination_unsupported, "Long-distance matching with external matchfinder enabled is not currently supported." ); @@ -3009,47 +3009,47 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ); assert(zc->externalMatchCtx.mFinder != NULL); - size_t nbExternalSeqs = (zc->externalMatchCtx.mFinder)( - zc->externalMatchCtx.mState, - zc->externalMatchCtx.seqBuffer, - zc->externalMatchCtx.seqBufferCapacity, - src, srcSize, - NULL, 0, /* dict and dictSize, currently not supported */ - zc->appliedParams.compressionLevel - ); - - if (nbExternalSeqs <= zc->externalMatchCtx.seqBufferCapacity) { - ZSTD_sequencePosition seqPos = {0,0,0}; - ZSTD_copySequencesToSeqStoreExplicitBlockDelim( - zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbExternalSeqs, src, srcSize + { size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)( + zc->externalMatchCtx.mState, + zc->externalMatchCtx.seqBuffer, + zc->externalMatchCtx.seqBufferCapacity, + src, srcSize, + NULL, 0, /* dict and dictSize, currently not supported */ + zc->appliedParams.compressionLevel ); - ms->ldmSeqStore = NULL; - lastLLSize = 0; - DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs); - } else { - if (zc->appliedParams.enableMatchFinderFallback) { - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, - zc->appliedParams.useRowMatchFinder, - dictMode); - ms->ldmSeqStore = NULL; - DEBUGLOG( - 5, - "External matchfinder returned error code %lu. Falling back to internal matchfinder.", - (unsigned long)nbExternalSeqs + + if (nbExternalSeqs <= zc->externalMatchCtx.seqBufferCapacity) { + ZSTD_sequencePosition seqPos = {0,0,0}; + ZSTD_copySequencesToSeqStoreExplicitBlockDelim( + zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbExternalSeqs, src, srcSize ); - lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + ms->ldmSeqStore = NULL; + lastLLSize = 0; + DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs); } else { - RETURN_ERROR( - externalMatchFinder_failed, - "External matchfinder returned an error code!" - ); - } - } + if (zc->appliedParams.enableMatchFinderFallback) { + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); + ms->ldmSeqStore = NULL; + DEBUGLOG( + 5, + "External matchfinder returned error code %lu. Falling back to internal matchfinder.", + (unsigned long)nbExternalSeqs + ); + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + } else { + RETURN_ERROR( + externalMatchFinder_failed, + "External matchfinder returned an error code!" + ); + } + } } } else { /* not long range mode and no external matchfinder */ - assert(zc->externalMatchCtx.mFinder == NULL); ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, dictMode); + assert(zc->externalMatchCtx.mFinder == NULL); ms->ldmSeqStore = NULL; lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } @@ -6683,7 +6683,6 @@ void ZSTD_refExternalMatchFinder( ZSTD_CCtx* zc, void* mState, ZSTD_externalMatchFinder_F* mFinder ) { - zc->requestedParams.useExternalMatchFinder = 1; ZSTD_externalMatchCtx emctx = { mState, mFinder, @@ -6693,4 +6692,5 @@ void ZSTD_refExternalMatchFinder( 0 /* seqBufferCapacity */ }; zc->externalMatchCtx = emctx; + zc->requestedParams.useExternalMatchFinder = 1; } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index b54d50b17c1..a119d544fa1 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -392,8 +392,7 @@ typedef struct { ZSTD_entropyCTablesMetadata_t entropyMetadata; } ZSTD_blockSplitCtx; -/* Block-level sequence compression API */ -// @nocommit improve docs +/* Context for block-level external matchfinder API */ typedef struct { void* mState; ZSTD_externalMatchFinder_F* mFinder; diff --git a/lib/zstd.h b/lib/zstd.h index 26656865e55..0f1e17456c4 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1511,20 +1511,20 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, const void* src, size_t srcSize); /* Block-level sequence compression API */ -// @nocommit improve docs +/* @nocommit improve docs */ #define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1)) -// @nocommit move bounds comments into docs +/* @nocommit move bounds comments into docs: + * outSeqsCapacity >= blockSize / MINMATCH + * srcSize <= 128 KB + * dictSize is not bounded */ typedef size_t ZSTD_externalMatchFinder_F ( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, - // outSeqsCapacity >= blockSize / MINMATCH const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel - // srcSize <= 128 KB - // dictSize is not bounded ); ZSTDLIB_STATIC_API void @@ -2071,7 +2071,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo */ #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 -// @nocommit document +/* @nocommit document */ #define ZSTD_c_enableMatchFinderFallback ZSTD_c_experimentalParam17 /*! ZSTD_CCtx_getParameter() : diff --git a/lib/zstd_errors.h b/lib/zstd_errors.h index 4edbfa54b9e..bd6dbee5ff8 100644 --- a/lib/zstd_errors.h +++ b/lib/zstd_errors.h @@ -75,6 +75,7 @@ typedef enum { ZSTD_error_dictionary_wrong = 32, ZSTD_error_dictionaryCreation_failed = 34, ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_combination_unsupported = 41, ZSTD_error_parameter_outOfBound = 42, ZSTD_error_tableLog_tooLarge = 44, ZSTD_error_maxSymbolValue_tooLarge = 46, From da90ae83c2bd3a1e3055ebba85a8235c63d1fd9f Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 1 Dec 2022 15:21:52 -0500 Subject: [PATCH 25/60] nits --- lib/compress/zstd_compress.c | 10 +--------- lib/zstd.h | 1 - 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e9a62c145d1..332f8c352d6 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -158,13 +158,6 @@ static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) cctx->cdict = NULL; } -/* Clears the external matchfinder, if one has been referenced. - * Does not free any resources owned by the external match state! */ -static void ZSTD_clearExternalMatchCtx(ZSTD_CCtx* cctx) { - ZSTD_externalMatchCtx nullCtx = {0}; - cctx->externalMatchCtx = nullCtx; -} - static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) { size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; @@ -1265,7 +1258,7 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, "Can't reset parameters only when not in init stage."); ZSTD_clearAllDicts(cctx); - ZSTD_clearExternalMatchCtx(cctx); + ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx)); return ZSTD_CCtxParams_reset(&cctx->requestedParams); } return 0; @@ -2913,7 +2906,6 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) assert(srcSize <= ZSTD_BLOCKSIZE_MAX); /* Assert that we have correctly flushed the ctx params into the ms's copy */ ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); - /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding * additional 1. We need to revisit and change this logic to be more consistent */ if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { diff --git a/lib/zstd.h b/lib/zstd.h index 0f1e17456c4..454237da48d 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -2706,7 +2706,6 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ - #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ #if defined (__cplusplus) From 4d12960f565e46388969ceb6e2bda3664c492243 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 4 Dec 2022 19:24:50 -0500 Subject: [PATCH 26/60] Test external matchfinder API --- contrib/externalMatchfinder/matchfinder.c | 5 +- lib/compress/zstd_compress.c | 59 ++++++++++++-- tests/Makefile | 2 +- tests/external_matchfinder.c | 99 +++++++++++++++++++++++ tests/external_matchfinder.h | 23 ++++++ tests/zstreamtest.c | 84 ++++++++++++++++++- 6 files changed, 261 insertions(+), 11 deletions(-) create mode 100644 tests/external_matchfinder.c create mode 100644 tests/external_matchfinder.h diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index 35914fb69bb..bc765c8bd69 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -1,6 +1,5 @@ #include "zstd_compress_internal.h" #include "matchfinder.h" -#include #define HSIZE 1024 static U32 const HLOG = 10; @@ -58,7 +57,7 @@ size_t simpleExternalMatchFinder( ZSTD_Sequence const finalSeq = { 0, iend - anchor, 0, 0 }; - outSeqs[seqCount] = finalSeq; + outSeqs[seqCount++] = finalSeq; - return seqCount; + return seqCount; } diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 332f8c352d6..091654983ce 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2897,6 +2897,49 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) ssPtr->longLengthType = ZSTD_llt_none; } +/* ZSTD_postProcessExternalMatchFinderResult() : + * Validates and post-processes sequences obtained through the external matchfinder API. + * Returns the number of sequences after post-processing, or an error code. */ +static size_t ZSTD_postProcessExternalMatchFinderResult( + ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, int emptySrc +) { + RETURN_ERROR_IF( + nbExternalSeqs > outSeqsCapacity, + externalMatchFinder_failed, + "External matchfinder returned error code %lu", + (unsigned long)nbExternalSeqs + ); + + RETURN_ERROR_IF( + nbExternalSeqs == 0 && !emptySrc, + externalMatchFinder_failed, + "External matchfinder produced zero sequences for a non-empty src buffer!" + ); + + if (emptySrc) { + ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence)); + return 1; + } else { + ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1]; + + /* Check if lastSeq is a block delimiter, append one if not */ + if (lastSeq.offset == 0 && lastSeq.matchLength == 0) { + return nbExternalSeqs; + } else { + /* This error condition is only possible if the external matchfinder + * produced an invalid parse, by definition of ZSTD_sequenceBound(). */ + RETURN_ERROR_IF( + nbExternalSeqs == outSeqsCapacity, + externalMatchFinder_failed, + "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!" + ); + + ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence)); + return nbExternalSeqs + 1; + } + } +} + typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) @@ -3010,10 +3053,17 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.compressionLevel ); - if (nbExternalSeqs <= zc->externalMatchCtx.seqBufferCapacity) { + size_t const nbPostProcessedSeqs = ZSTD_postProcessExternalMatchFinderResult( + zc->externalMatchCtx.seqBuffer, + nbExternalSeqs, + zc->externalMatchCtx.seqBufferCapacity, + srcSize == 0 /* emptySrc */ + ); + + if (!ZSTD_isError(nbPostProcessedSeqs)) { ZSTD_sequencePosition seqPos = {0,0,0}; ZSTD_copySequencesToSeqStoreExplicitBlockDelim( - zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbExternalSeqs, src, srcSize + zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs, src, srcSize ); ms->ldmSeqStore = NULL; lastLLSize = 0; @@ -3031,10 +3081,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } else { - RETURN_ERROR( - externalMatchFinder_failed, - "External matchfinder returned an error code!" - ); + return nbPostProcessedSeqs; /* return an error */ } } } } else { /* not long range mode and no external matchfinder */ diff --git a/tests/Makefile b/tests/Makefile index baf616acb34..8226176cc86 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -169,7 +169,7 @@ fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PR $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) CLEAN += zstreamtest zstreamtest32 -ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c seqgen.c zstreamtest.c +ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c seqgen.c zstreamtest.c external_matchfinder.c ZSTREAM_PROPER_FILES := $(ZDICT_FILES) $(ZSTREAM_LOCAL_FILES) ZSTREAMFILES := $(ZSTD_FILES) $(ZSTREAM_PROPER_FILES) zstreamtest32 : CFLAGS += -m32 diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c new file mode 100644 index 00000000000..8938c8db3f9 --- /dev/null +++ b/tests/external_matchfinder.c @@ -0,0 +1,99 @@ +#include "external_matchfinder.h" +#include +#include "zstd_compress_internal.h" +#include + +#define HSIZE 1024 +static U32 const HLOG = 10; +static U32 const MLS = 4; +static U32 const BADIDX = (1 << 31); + +static size_t simpleExternalMatchFinder( + void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel +) { + const BYTE* const istart = (const BYTE*)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + const BYTE* anchor = istart; + size_t seqCount = 0; + U32 hashTable[HSIZE]; + + (void)externalMatchState; + (void)dict; + (void)dictSize; + (void)outSeqsCapacity; + (void)compressionLevel; + + for (int i=0; i < HSIZE; i++) { + hashTable[i] = BADIDX; + } + + while (ip + 4 < iend) { + size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); + U32 const matchIndex = hashTable[hash]; + hashTable[hash] = ip - istart; + + if (matchIndex != BADIDX) { + const BYTE* const match = istart + matchIndex; + size_t const matchLen = ZSTD_count(ip, match, iend); + if (matchLen >= ZSTD_MINMATCH_MIN) { + U32 const litLen = ip - anchor; + U32 const offset = ip - match; + ZSTD_Sequence const seq = { + offset, litLen, matchLen, 0 + }; + outSeqs[seqCount++] = seq; + ip += matchLen; + anchor = ip; + continue; + } + } + + ip++; + } + + { ZSTD_Sequence const finalSeq = { + 0, iend - anchor, 0, 0 + }; + outSeqs[seqCount++] = finalSeq; + } + + return seqCount; +} + +size_t zstreamExternalMatchFinder( + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel +) { + EMF_testCase testCase = *((EMF_testCase*)externalMatchState); + memset(outSeqs, 0, outSeqsCapacity); + + switch (testCase) { + case EMF_ZERO_SEQS: + return 0; + case EMF_ONE_BIG_SEQ: + outSeqs[0].offset = 0; + outSeqs[0].matchLength = 0; + outSeqs[0].litLength = srcSize; + return 1; + case EMF_LOTS_OF_SEQS: + return simpleExternalMatchFinder( + externalMatchState, + outSeqs, outSeqsCapacity, + src, srcSize, + dict, dictSize, + compressionLevel + ); + case EMF_SMALL_ERROR: + return outSeqsCapacity + 1; + case EMF_BIG_ERROR: + default: + return ZSTD_EXTERNAL_MATCHFINDER_ERROR; + } +} diff --git a/tests/external_matchfinder.h b/tests/external_matchfinder.h new file mode 100644 index 00000000000..279aa7ab53a --- /dev/null +++ b/tests/external_matchfinder.h @@ -0,0 +1,23 @@ +#ifndef EXTERNAL_MATCHFINDER +#define EXTERNAL_MATCHFINDER + +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" + +/* See external_matchfinder.c for details on each test case */ +typedef enum { + EMF_ZERO_SEQS = 0, + EMF_ONE_BIG_SEQ = 1, + EMF_LOTS_OF_SEQS = 2, + EMF_BIG_ERROR = 3, + EMF_SMALL_ERROR = 4 +} EMF_testCase; + +size_t zstreamExternalMatchFinder( + void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel +); + +#endif // EXTERNAL_MATCHFINDER diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 30c5215ae65..6fd35a0fd76 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -39,7 +39,7 @@ #include "seqgen.h" #include "util.h" #include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */ - +#include "external_matchfinder.h" /* zstreamExternalMatchFinder */ /*-************************************ * Constants @@ -1834,6 +1834,88 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : External matchfinder API: ", testNb++); + { + size_t const dstBufSize = ZSTD_compressBound(CNBufferSize); + BYTE* dstBuf = (BYTE*)malloc(ZSTD_compressBound(dstBufSize)); + size_t const checkBufSize = CNBufferSize; + BYTE* checkBuf = (BYTE*)malloc(checkBufSize); + int enableFallback; + size_t res; + EMF_testCase externalMatchState; + + ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters); + + /* Reference external matchfinder outside the test loop to + * check that the reference is preserved across compressions */ + ZSTD_refExternalMatchFinder( + zc, + &externalMatchState, + zstreamExternalMatchFinder + ); + + for (enableFallback = 0; enableFallback < 1; enableFallback++) { + size_t testCaseId; + + EMF_testCase const EMF_successCases[] = { + EMF_ONE_BIG_SEQ, + EMF_LOTS_OF_SEQS, + }; + size_t const EMF_numSuccessCases = 2; + + EMF_testCase const EMF_failureCases[] = { + EMF_ZERO_SEQS, + EMF_BIG_ERROR, + EMF_SMALL_ERROR, + }; + size_t const EMF_numFailureCases = 3; + + /* Test external matchfinder success scenarios */ + for (testCaseId = 0; testCaseId < EMF_numSuccessCases; testCaseId++) { + externalMatchState = EMF_successCases[testCaseId]; + ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); + res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize); + CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res)); + CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res)); + CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!"); + } + + /* Test external matchfinder failure scenarios */ + for (testCaseId = 0; testCaseId < EMF_numFailureCases; testCaseId++) { + externalMatchState = EMF_failureCases[testCaseId]; + ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); + res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize); + if (enableFallback) { + CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res)); + CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!"); + } else { + CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!"); + CHECK( + ZSTD_getErrorCode(res) != ZSTD_error_externalMatchFinder_failed, + "EMF: Wrong error code: %s", ZSTD_getErrorName(res) + ); + } + } + + /* Test compression with external matchfinder + empty src buffer */ + externalMatchState = EMF_ZERO_SEQS; + ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); + res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, 0); + CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res)); + CHECK(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res) != 0, "EMF: Empty src round trip failed!"); + } + + /* Test that reset clears the external matchfinder */ + ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters); + externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */ + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0)); + CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize)); + } + DISPLAYLEVEL(3, "OK \n"); + _end: FUZ_freeDictionary(dictionary); ZSTD_freeCStream(zc); From a96847211f60714af1165bfb03ba9b6970a5aa3e Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 6 Dec 2022 02:41:27 -0500 Subject: [PATCH 27/60] C90 compat for simpleExternalMatchFinder --- contrib/externalMatchfinder/matchfinder.c | 34 ++++++++++++----------- tests/external_matchfinder.c | 10 ++++--- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index bc765c8bd69..55f686b0a8f 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -7,28 +7,29 @@ static U32 const MLS = 4; static U32 const BADIDX = (1 << 31); size_t simpleExternalMatchFinder( - void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel ) { - (void)externalMatchState; - (void)dict; - (void)dictSize; - (void)outSeqsCapacity; - (void)compressionLevel; - const BYTE* const istart = (const BYTE*)src; const BYTE* const iend = istart + srcSize; const BYTE* ip = istart; const BYTE* anchor = istart; - size_t seqCount = 0; - U32 hashTable[HSIZE]; - for (int i=0; i < HSIZE; i++) { - hashTable[i] = BADIDX; - } + + (void)externalMatchState; + (void)dict; + (void)dictSize; + (void)outSeqsCapacity; + (void)compressionLevel; + + { int i; + for (i=0; i < HSIZE; i++) { + hashTable[i] = BADIDX; + } } while (ip + 4 < iend) { size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); @@ -54,10 +55,11 @@ size_t simpleExternalMatchFinder( ip++; } - ZSTD_Sequence const finalSeq = { - 0, iend - anchor, 0, 0 - }; - outSeqs[seqCount++] = finalSeq; + { ZSTD_Sequence const finalSeq = { + 0, iend - anchor, 0, 0 + }; + outSeqs[seqCount++] = finalSeq; + } return seqCount; } diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index 8938c8db3f9..2c43a0127c2 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -9,7 +9,8 @@ static U32 const MLS = 4; static U32 const BADIDX = (1 << 31); static size_t simpleExternalMatchFinder( - void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel @@ -27,9 +28,10 @@ static size_t simpleExternalMatchFinder( (void)outSeqsCapacity; (void)compressionLevel; - for (int i=0; i < HSIZE; i++) { - hashTable[i] = BADIDX; - } + { int i; + for (i=0; i < HSIZE; i++) { + hashTable[i] = BADIDX; + } } while (ip + 4 < iend) { size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); From 80765c6572818993fda8e1b3505f3202fc5b956f Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 6 Dec 2022 13:23:38 -0500 Subject: [PATCH 28/60] Fix some @nocommits and an ASAN bug --- contrib/externalMatchfinder/main.c | 2 +- lib/compress/zstd_compress.c | 6 ++++-- lib/zstd.h | 12 ++++++------ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 60966a9eb4d..7d84123ddd8 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -17,7 +17,7 @@ int main(int argc, char *argv[]) { ZSTD_CCtx* zc = ZSTD_createCCtx(); - int simpleExternalMatchState = 0xdeadbeef; // @nocommit + int simpleExternalMatchState = 0xdeadbeef; // Here is the crucial bit of code! ZSTD_refExternalMatchFinder( diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 091654983ce..a8b58747f54 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1533,8 +1533,10 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; - size_t const externalSeqSpace = useExternalMatchFinder ? - ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX) * sizeof(ZSTD_Sequence) : 0; + size_t const maxNbExternalSeq = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); + size_t const externalSeqSpace = useExternalMatchFinder + ? ZSTD_cwksp_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence)) + : 0; size_t const neededSpace = cctxSpace + diff --git a/lib/zstd.h b/lib/zstd.h index 454237da48d..5e87aff3f5c 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -562,7 +562,7 @@ typedef enum { * They will be used to compress next frame. * Resetting session never fails. * - The parameters : changes all parameters back to "default". - * This removes any reference to any dictionary too. @nocommit mention external matchfinder + * This also removes any reference to any dictionary or external matchfinder. * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) * - Both : similar to resetting the session, followed by resetting parameters. @@ -1511,14 +1511,14 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, const void* src, size_t srcSize); /* Block-level sequence compression API */ -/* @nocommit improve docs */ +/* @nocommit document */ #define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1)) -/* @nocommit move bounds comments into docs: - * outSeqsCapacity >= blockSize / MINMATCH - * srcSize <= 128 KB - * dictSize is not bounded */ +/* @nocommit document these constraints: + * - outSeqsCapacity >= (blockSize / MINMATCH) + 1 + * - srcSize <= 128 KB + * - dictSize is not bounded */ typedef size_t ZSTD_externalMatchFinder_F ( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, From 15d909af5e1f0c5793f0c9c071aeaa0a5c55f922 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 7 Dec 2022 11:13:02 -0500 Subject: [PATCH 29/60] nit --- contrib/externalMatchfinder/matchfinder.c | 10 +++++----- tests/external_matchfinder.c | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index 55f686b0a8f..7fabc1a4a1b 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -34,14 +34,14 @@ size_t simpleExternalMatchFinder( while (ip + 4 < iend) { size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); U32 const matchIndex = hashTable[hash]; - hashTable[hash] = ip - istart; + hashTable[hash] = (U32)(ip - istart); if (matchIndex != BADIDX) { const BYTE* const match = istart + matchIndex; - size_t const matchLen = ZSTD_count(ip, match, iend); + U32 const matchLen = (U32)ZSTD_count(ip, match, iend); if (matchLen >= ZSTD_MINMATCH_MIN) { - U32 const litLen = ip - anchor; - U32 const offset = ip - match; + U32 const litLen = (U32)(ip - anchor); + U32 const offset = (U32)(ip - match); ZSTD_Sequence const seq = { offset, litLen, matchLen, 0 }; @@ -56,7 +56,7 @@ size_t simpleExternalMatchFinder( } { ZSTD_Sequence const finalSeq = { - 0, iend - anchor, 0, 0 + 0, (U32)(iend - anchor), 0, 0 }; outSeqs[seqCount++] = finalSeq; } diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index 2c43a0127c2..fc504ee8471 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -36,14 +36,14 @@ static size_t simpleExternalMatchFinder( while (ip + 4 < iend) { size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); U32 const matchIndex = hashTable[hash]; - hashTable[hash] = ip - istart; + hashTable[hash] = (U32)(ip - istart); if (matchIndex != BADIDX) { const BYTE* const match = istart + matchIndex; - size_t const matchLen = ZSTD_count(ip, match, iend); + U32 const matchLen = (U32)ZSTD_count(ip, match, iend); if (matchLen >= ZSTD_MINMATCH_MIN) { - U32 const litLen = ip - anchor; - U32 const offset = ip - match; + U32 const litLen = (U32)(ip - anchor); + U32 const offset = (U32)(ip - match); ZSTD_Sequence const seq = { offset, litLen, matchLen, 0 }; @@ -58,7 +58,7 @@ static size_t simpleExternalMatchFinder( } { ZSTD_Sequence const finalSeq = { - 0, iend - anchor, 0, 0 + 0, (U32)(iend - anchor), 0, 0 }; outSeqs[seqCount++] = finalSeq; } @@ -82,7 +82,7 @@ size_t zstreamExternalMatchFinder( case EMF_ONE_BIG_SEQ: outSeqs[0].offset = 0; outSeqs[0].matchLength = 0; - outSeqs[0].litLength = srcSize; + outSeqs[0].litLength = (U32)(srcSize); return 1; case EMF_LOTS_OF_SEQS: return simpleExternalMatchFinder( From 901ab34d2c5b198706f40e8733030f6f3e5a4203 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 7 Dec 2022 11:29:15 -0500 Subject: [PATCH 30/60] nit --- tests/zstreamtest.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 6fd35a0fd76..81afe9e4a92 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1913,6 +1913,9 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */ CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0)); CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize)); + + free(dstBuf); + free(checkBuf); } DISPLAYLEVEL(3, "OK \n"); From da18842e4431217d11cb2c6a6a0ad2ba5431a9be Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 7 Dec 2022 11:59:20 -0500 Subject: [PATCH 31/60] nits --- contrib/externalMatchfinder/main.c | 8 ++++---- contrib/externalMatchfinder/matchfinder.h | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 7d84123ddd8..003b4b55783 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -5,7 +5,7 @@ #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "zstd_errors.h" -#include "matchfinder.h" // simpleExternalMatchFinder, simpleExternalMatchStateDestructor +#include "matchfinder.h" // simpleExternalMatchFinder int main(int argc, char *argv[]) { size_t res; @@ -35,17 +35,17 @@ int main(int argc, char *argv[]) { FILE *f = fopen(argv[1], "rb"); fseek(f, 0, SEEK_END); - long srcSize = ftell(f); + long const srcSize = ftell(f); fseek(f, 0, SEEK_SET); char *src = malloc(srcSize + 1); fread(src, srcSize, 1, f); fclose(f); - size_t dstSize = ZSTD_compressBound(srcSize); + size_t const dstSize = ZSTD_compressBound(srcSize); char *dst = malloc(dstSize); - size_t cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize); + size_t const cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize); if (ZSTD_isError(cSize)) { printf("ERROR: %s\n", ZSTD_getErrorName(cSize)); diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index 1d52531fc11..b89a1a4a4e0 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -5,7 +5,8 @@ #include "zstd.h" size_t simpleExternalMatchFinder( - void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel From ec9b6b09623c8c8c609570ee38226cc5602e80cc Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 7 Dec 2022 12:01:20 -0500 Subject: [PATCH 32/60] forward declare copySequencesToSeqStore functions in zstd_compress_internal.h --- lib/compress/zstd_compress.c | 17 +------------- lib/compress/zstd_compress_internal.h | 32 ++++++++++++++++++++++----- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index a8b58747f54..bf6ca5615e1 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -6117,9 +6117,6 @@ static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 return offBase; } -/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of - * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. - */ size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, @@ -6174,19 +6171,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, return 0; } -/* Returns the number of bytes to move the current read position back by. - * Only non-zero if we ended up splitting a sequence. - * Otherwise, it may return a ZSTD error if something went wrong. - * - * This function will attempt to scan through blockSize bytes - * represented by the sequences in @inSeqs, - * storing any (partial) sequences. - * - * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to - * avoid splitting a match, or to avoid splitting a match such that it would produce a match - * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. - */ -static size_t +size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, const void* src, size_t blockSize) diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index a119d544fa1..b81f3bf4408 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -156,12 +156,6 @@ typedef struct { size_t posInSrc; /* Number of bytes given by sequences provided so far */ } ZSTD_sequencePosition; -size_t -ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, - ZSTD_sequencePosition* seqPos, - const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, - const void* src, size_t blockSize); - UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; typedef struct { @@ -1443,4 +1437,30 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); */ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); +/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of + * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. + */ +size_t +ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, + ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize); + +/* Returns the number of bytes to move the current read position back by. + * Only non-zero if we ended up splitting a sequence. + * Otherwise, it may return a ZSTD error if something went wrong. + * + * This function will attempt to scan through blockSize bytes + * represented by the sequences in @inSeqs, + * storing any (partial) sequences. + * + * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to + * avoid splitting a match, or to avoid splitting a match such that it would produce a match + * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. + */ +size_t +ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize); + #endif /* ZSTD_COMPRESS_H */ From b8b554768dc43b36597a7519120cf0d56a3b1d44 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 7 Dec 2022 12:06:46 -0500 Subject: [PATCH 33/60] nit --- tests/external_matchfinder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index fc504ee8471..6655ed5c038 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -73,7 +73,7 @@ size_t zstreamExternalMatchFinder( const void* dict, size_t dictSize, int compressionLevel ) { - EMF_testCase testCase = *((EMF_testCase*)externalMatchState); + EMF_testCase const testCase = *((EMF_testCase*)externalMatchState); memset(outSeqs, 0, outSeqsCapacity); switch (testCase) { From 38182daca2eec8fba7896e2bee9ef3a6fce66a7f Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 7 Dec 2022 12:11:14 -0500 Subject: [PATCH 34/60] nit --- tests/zstreamtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 81afe9e4a92..e9bf6b6681b 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -39,7 +39,7 @@ #include "seqgen.h" #include "util.h" #include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */ -#include "external_matchfinder.h" /* zstreamExternalMatchFinder */ +#include "external_matchfinder.h" /* zstreamExternalMatchFinder, EMF_testCase */ /*-************************************ * Constants From d9534ad6898cc9add4388102e743f6ed29b5f382 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 7 Dec 2022 15:56:42 -0500 Subject: [PATCH 35/60] nits --- tests/external_matchfinder.c | 1 - tests/external_matchfinder.h | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index 6655ed5c038..a87ce2841b9 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -1,7 +1,6 @@ #include "external_matchfinder.h" #include #include "zstd_compress_internal.h" -#include #define HSIZE 1024 static U32 const HLOG = 10; diff --git a/tests/external_matchfinder.h b/tests/external_matchfinder.h index 279aa7ab53a..595ae02a567 100644 --- a/tests/external_matchfinder.h +++ b/tests/external_matchfinder.h @@ -14,7 +14,8 @@ typedef enum { } EMF_testCase; size_t zstreamExternalMatchFinder( - void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel From aabfc346ee513a17148504e40b94902296bc832f Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 8 Dec 2022 13:45:43 -0500 Subject: [PATCH 36/60] Update copyright headers --- contrib/externalMatchfinder/main.c | 10 ++++++++++ contrib/externalMatchfinder/matchfinder.c | 10 ++++++++++ contrib/externalMatchfinder/matchfinder.h | 10 ++++++++++ tests/external_matchfinder.c | 10 ++++++++++ tests/external_matchfinder.h | 10 ++++++++++ 5 files changed, 50 insertions(+) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 003b4b55783..4effc587008 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -1,3 +1,13 @@ +/* + * Copyright (c) 2018-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + #include #include #include diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index 7fabc1a4a1b..23f86615106 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -1,3 +1,13 @@ +/* + * Copyright (c) 2018-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + #include "zstd_compress_internal.h" #include "matchfinder.h" diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index b89a1a4a4e0..62666372603 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -1,3 +1,13 @@ +/* + * Copyright (c) 2018-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + #ifndef MATCHFINDER_H #define MATCHFINDER_H diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index a87ce2841b9..ed7bdee824d 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -1,3 +1,13 @@ +/* + * Copyright (c) 2018-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + #include "external_matchfinder.h" #include #include "zstd_compress_internal.h" diff --git a/tests/external_matchfinder.h b/tests/external_matchfinder.h index 595ae02a567..dcf5a74979c 100644 --- a/tests/external_matchfinder.h +++ b/tests/external_matchfinder.h @@ -1,3 +1,13 @@ +/* + * Copyright (c) 2018-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + #ifndef EXTERNAL_MATCHFINDER #define EXTERNAL_MATCHFINDER From f866d0ec25913c101eb5b5cee7176b23c4fa805a Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 8 Dec 2022 17:13:10 -0500 Subject: [PATCH 37/60] Fix CMake zstreamtest build --- build/cmake/tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt index 53e0e7b173e..ed8ca164138 100644 --- a/build/cmake/tests/CMakeLists.txt +++ b/build/cmake/tests/CMakeLists.txt @@ -81,7 +81,7 @@ add_test(NAME fuzzer COMMAND fuzzer ${ZSTD_FUZZER_FLAGS}) # # zstreamtest # -add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c) +add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c ${TESTS_DIR}/external_matchfinder.c)) if (NOT MSVC) target_compile_options(zstreamtest PRIVATE "-Wno-deprecated-declarations") endif() From 3a0efdfbaddcb694c5b9c46a626c7cd510349784 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 8 Dec 2022 17:19:08 -0500 Subject: [PATCH 38/60] Fix copyright headers (again) --- contrib/externalMatchfinder/Makefile | 2 +- contrib/externalMatchfinder/main.c | 2 +- contrib/externalMatchfinder/matchfinder.c | 2 +- contrib/externalMatchfinder/matchfinder.h | 2 +- tests/external_matchfinder.c | 2 +- tests/external_matchfinder.h | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/contrib/externalMatchfinder/Makefile b/contrib/externalMatchfinder/Makefile index 46a5fc2dc08..fee06b1c5a1 100644 --- a/contrib/externalMatchfinder/Makefile +++ b/contrib/externalMatchfinder/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2018-present, Yann Collet, Facebook, Inc. +# Copyright (c) Yann Collet, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 4effc587008..8540259ac55 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-present, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index 23f86615106..7a067d16604 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-present, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index 62666372603..7b259d00124 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-present, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index ed7bdee824d..1f037abe67d 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-present, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/external_matchfinder.h b/tests/external_matchfinder.h index dcf5a74979c..809c7bb24ab 100644 --- a/tests/external_matchfinder.h +++ b/tests/external_matchfinder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-present, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the From b1e2422d6eab906b07f2169e6e706fb31416dca9 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 8 Dec 2022 17:24:17 -0500 Subject: [PATCH 39/60] typo --- build/cmake/tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt index ed8ca164138..250f0508f37 100644 --- a/build/cmake/tests/CMakeLists.txt +++ b/build/cmake/tests/CMakeLists.txt @@ -81,7 +81,7 @@ add_test(NAME fuzzer COMMAND fuzzer ${ZSTD_FUZZER_FLAGS}) # # zstreamtest # -add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c ${TESTS_DIR}/external_matchfinder.c)) +add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c ${TESTS_DIR}/external_matchfinder.c) if (NOT MSVC) target_compile_options(zstreamtest PRIVATE "-Wno-deprecated-declarations") endif() From b6fe61d57b4314fdbd743214400a97ded0a6c5b2 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 19 Dec 2022 07:50:53 -0800 Subject: [PATCH 40/60] Add externalMatchfinder demo program to make contrib --- Makefile | 2 ++ contrib/externalMatchfinder/.gitignore | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 contrib/externalMatchfinder/.gitignore diff --git a/Makefile b/Makefile index 10fbe47f72d..d87fc76eb52 100644 --- a/Makefile +++ b/Makefile @@ -123,6 +123,7 @@ contrib: lib $(MAKE) -C contrib/seekable_format/examples all $(MAKE) -C contrib/seekable_format/tests test $(MAKE) -C contrib/largeNbDicts all + $(MAKE) -C contrib/externalMatchfinder all cd build/single_file_libs/ ; ./build_decoder_test.sh cd build/single_file_libs/ ; ./build_library_test.sh @@ -142,6 +143,7 @@ clean: $(Q)$(MAKE) -C contrib/seekable_format/examples $@ > $(VOID) $(Q)$(MAKE) -C contrib/seekable_format/tests $@ > $(VOID) $(Q)$(MAKE) -C contrib/largeNbDicts $@ > $(VOID) + $(Q)$(MAKE) -C contrib/externalMatchfinder $@ > $(VOID) $(Q)$(RM) zstd$(EXT) zstdmt$(EXT) tmp* $(Q)$(RM) -r lz4 @echo Cleaning completed diff --git a/contrib/externalMatchfinder/.gitignore b/contrib/externalMatchfinder/.gitignore new file mode 100644 index 00000000000..46357ef5800 --- /dev/null +++ b/contrib/externalMatchfinder/.gitignore @@ -0,0 +1,2 @@ +# build artifacts +externalMatchfinder From 3de6d5caad10ad73dc8714ca51b0abc82b6638e4 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 19 Dec 2022 07:51:01 -0800 Subject: [PATCH 41/60] Reduce memory consumption for small blockSize --- lib/compress/zstd_compress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index bf6ca5615e1..e1b4d0a12e4 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1533,7 +1533,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; - size_t const maxNbExternalSeq = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); + size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); size_t const externalSeqSpace = useExternalMatchFinder ? ZSTD_cwksp_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence)) : 0; @@ -2042,7 +2042,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, /* reserve space for block-level external sequences */ if (params->useExternalMatchFinder) { - size_t const maxNbExternalSeq = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); + size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq; zc->externalMatchCtx.seqBuffer = (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence)); From fc37297c398855b1e23911e31cb196e79d4b4811 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 19 Dec 2022 07:51:03 -0800 Subject: [PATCH 42/60] ZSTD_postProcessExternalMatchFinderResult nits --- lib/compress/zstd_compress.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e1b4d0a12e4..4fc120c442b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2900,7 +2900,10 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) } /* ZSTD_postProcessExternalMatchFinderResult() : - * Validates and post-processes sequences obtained through the external matchfinder API. + * Validates and post-processes sequences obtained through the external matchfinder API: + * - Checks whether nbExternalSeqs represents an error condition. + * - Appends a block delimiter to outSeqs if one is not already present. + * See zstd.h for context regarding block delimiters. * Returns the number of sequences after post-processing, or an error code. */ static size_t ZSTD_postProcessExternalMatchFinderResult( ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, int emptySrc @@ -2921,24 +2924,27 @@ static size_t ZSTD_postProcessExternalMatchFinderResult( if (emptySrc) { ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence)); return 1; - } else { + } + + { ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1]; - /* Check if lastSeq is a block delimiter, append one if not */ + /* We can return early if lastSeq is already a block delimiter. */ if (lastSeq.offset == 0 && lastSeq.matchLength == 0) { return nbExternalSeqs; - } else { - /* This error condition is only possible if the external matchfinder - * produced an invalid parse, by definition of ZSTD_sequenceBound(). */ - RETURN_ERROR_IF( - nbExternalSeqs == outSeqsCapacity, - externalMatchFinder_failed, - "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!" - ); - - ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence)); - return nbExternalSeqs + 1; } + + /* This error condition is only possible if the external matchfinder + * produced an invalid parse, by definition of ZSTD_sequenceBound(). */ + RETURN_ERROR_IF( + nbExternalSeqs == outSeqsCapacity, + externalMatchFinder_failed, + "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!" + ); + + /* lastSeq is not a block delimiter, so we need to append one. */ + ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence)); + return nbExternalSeqs + 1; } } From 029ba01da9ea723c6fe748ceeca3f8b552d0f16d Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 19 Dec 2022 07:51:05 -0800 Subject: [PATCH 43/60] test sum(matchlen) + sum(litlen) == srcSize in debug builds --- lib/compress/zstd_compress.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 4fc120c442b..fd7aa86b83b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2904,9 +2904,10 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) * - Checks whether nbExternalSeqs represents an error condition. * - Appends a block delimiter to outSeqs if one is not already present. * See zstd.h for context regarding block delimiters. + * - In debug builds, verify that the external litLengths + matchLengths add to srcSize. * Returns the number of sequences after post-processing, or an error code. */ static size_t ZSTD_postProcessExternalMatchFinderResult( - ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, int emptySrc + ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize ) { RETURN_ERROR_IF( nbExternalSeqs > outSeqsCapacity, @@ -2916,16 +2917,35 @@ static size_t ZSTD_postProcessExternalMatchFinderResult( ); RETURN_ERROR_IF( - nbExternalSeqs == 0 && !emptySrc, + nbExternalSeqs == 0 && srcSize > 0, externalMatchFinder_failed, "External matchfinder produced zero sequences for a non-empty src buffer!" ); - if (emptySrc) { + if (srcSize == 0) { ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence)); return 1; } +#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) + { + size_t coveredBytes, idx; + + for (idx = 0; idx < nbExternalSeqs; idx++) { + /* We already know that nbExternalSeqs <= outSeqsCapacity */ + assert(idx < outSeqsCapacity); + coveredBytes += outSeqs[idx].litLength; + coveredBytes += outSeqs[idx].matchLength; + } + + RETURN_ERROR_IF( + coveredBytes != srcSize, + externalMatchFinder_failed, + "External matchfinder produced an incorrect parse!" + ); + } +#endif + { ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1]; @@ -3065,7 +3085,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->externalMatchCtx.seqBuffer, nbExternalSeqs, zc->externalMatchCtx.seqBufferCapacity, - srcSize == 0 /* emptySrc */ + srcSize ); if (!ZSTD_isError(nbPostProcessedSeqs)) { From c0be839c94f85ba2b90e5aa3103ed3e7dbf789ec Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 19 Dec 2022 07:51:29 -0800 Subject: [PATCH 44/60] refExternalMatchFinder -> registerExternalMatchFinder --- contrib/externalMatchfinder/main.c | 2 +- lib/compress/zstd_compress.c | 2 +- lib/compress/zstd_compress_internal.h | 2 +- lib/zstd.h | 2 +- tests/zstreamtest.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 8540259ac55..cf68b24a9af 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -30,7 +30,7 @@ int main(int argc, char *argv[]) { int simpleExternalMatchState = 0xdeadbeef; // Here is the crucial bit of code! - ZSTD_refExternalMatchFinder( + ZSTD_registerExternalMatchFinder( zc, &simpleExternalMatchState, simpleExternalMatchFinder diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index fd7aa86b83b..dcefa2cfc23 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -6731,7 +6731,7 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); } -void ZSTD_refExternalMatchFinder( +void ZSTD_registerExternalMatchFinder( ZSTD_CCtx* zc, void* mState, ZSTD_externalMatchFinder_F* mFinder ) { diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index b81f3bf4408..10ff3be11f2 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -353,7 +353,7 @@ struct ZSTD_CCtx_params_s { /* Indicates whether an external matchfinder has been referenced. * Users can't set this externally. - * It is set internally in ZSTD_refExternalMatchFinder(). */ + * It is set internally in ZSTD_registerExternalMatchFinder(). */ int useExternalMatchFinder; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ diff --git a/lib/zstd.h b/lib/zstd.h index 5e87aff3f5c..efaaf2eda21 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1528,7 +1528,7 @@ typedef size_t ZSTD_externalMatchFinder_F ( ); ZSTDLIB_STATIC_API void -ZSTD_refExternalMatchFinder( +ZSTD_registerExternalMatchFinder( ZSTD_CCtx* cctx, void* externalMatchState, ZSTD_externalMatchFinder_F* externalMatchFinder diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index e9bf6b6681b..c2b89fd7628 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1848,7 +1848,7 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) /* Reference external matchfinder outside the test loop to * check that the reference is preserved across compressions */ - ZSTD_refExternalMatchFinder( + ZSTD_registerExternalMatchFinder( zc, &externalMatchState, zstreamExternalMatchFinder From f4685d2a375320002a169f0ffa8ad5d6930741bd Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 19 Dec 2022 07:51:32 -0800 Subject: [PATCH 45/60] C90 nit --- lib/compress/zstd_compress.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index dcefa2cfc23..e50e4e6b8ae 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2929,7 +2929,8 @@ static size_t ZSTD_postProcessExternalMatchFinderResult( #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) { - size_t coveredBytes, idx; + size_t coveredBytes = 0; + size_t idx = 0; for (idx = 0; idx < nbExternalSeqs; idx++) { /* We already know that nbExternalSeqs <= outSeqsCapacity */ From 153be31b63b4e2d6caf4f7af5b2b8ed5bed31a27 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 19 Dec 2022 11:44:19 -0800 Subject: [PATCH 46/60] zstreamtest nits --- tests/zstreamtest.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index c2b89fd7628..496885e6624 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1841,9 +1841,10 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) size_t const checkBufSize = CNBufferSize; BYTE* checkBuf = (BYTE*)malloc(checkBufSize); int enableFallback; - size_t res; EMF_testCase externalMatchState; + CHECK(dstBuf == NULL || checkBuf == NULL, "allocation failed"); + ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters); /* Reference external matchfinder outside the test loop to @@ -1872,6 +1873,7 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) /* Test external matchfinder success scenarios */ for (testCaseId = 0; testCaseId < EMF_numSuccessCases; testCaseId++) { + size_t res; externalMatchState = EMF_successCases[testCaseId]; ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); @@ -1883,6 +1885,7 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) /* Test external matchfinder failure scenarios */ for (testCaseId = 0; testCaseId < EMF_numFailureCases; testCaseId++) { + size_t res; externalMatchState = EMF_failureCases[testCaseId]; ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); @@ -1900,12 +1903,15 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) } /* Test compression with external matchfinder + empty src buffer */ - externalMatchState = EMF_ZERO_SEQS; - ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); - CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); - res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, 0); - CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res)); - CHECK(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res) != 0, "EMF: Empty src round trip failed!"); + { + size_t res; + externalMatchState = EMF_ZERO_SEQS; + ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); + res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, 0); + CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res)); + CHECK(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res) != 0, "EMF: Empty src round trip failed!"); + } } /* Test that reset clears the external matchfinder */ From beab112dcef72df55df5d59050c97c6bdaf6d70b Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 19 Dec 2022 12:31:55 -0800 Subject: [PATCH 47/60] contrib nits --- contrib/externalMatchfinder/main.c | 46 ++++++++++++----------- contrib/externalMatchfinder/matchfinder.c | 2 +- tests/external_matchfinder.c | 2 +- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index cf68b24a9af..2ad0e408e34 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -11,21 +11,28 @@ #include #include #include +#include #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "zstd_errors.h" #include "matchfinder.h" // simpleExternalMatchFinder -int main(int argc, char *argv[]) { - size_t res; +#define CHECK(res) \ +do { \ + if (ZSTD_isError(res)) { \ + printf("ERROR: %s\n", ZSTD_getErrorName(res)); \ + return 1; \ + } \ +} while (0) \ +int main(int argc, char *argv[]) { if (argc != 2) { printf("Usage: exampleMatchfinder \n"); return 1; } - ZSTD_CCtx* zc = ZSTD_createCCtx(); + ZSTD_CCtx* const zc = ZSTD_createCCtx(); int simpleExternalMatchState = 0xdeadbeef; @@ -36,40 +43,33 @@ int main(int argc, char *argv[]) { simpleExternalMatchFinder ); - res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 1); - - if (ZSTD_isError(res)) { - printf("ERROR: %s\n", ZSTD_getErrorName(res)); - return 1; + { + size_t const res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 1); + CHECK(res); } FILE *f = fopen(argv[1], "rb"); + assert(f); fseek(f, 0, SEEK_END); long const srcSize = ftell(f); fseek(f, 0, SEEK_SET); - char *src = malloc(srcSize + 1); + char* const src = malloc(srcSize + 1); fread(src, srcSize, 1, f); fclose(f); size_t const dstSize = ZSTD_compressBound(srcSize); - char *dst = malloc(dstSize); + char* const dst = malloc(dstSize); size_t const cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize); + CHECK(cSize); - if (ZSTD_isError(cSize)) { - printf("ERROR: %s\n", ZSTD_getErrorName(cSize)); - return 1; - } - - char *val = malloc(srcSize); - res = ZSTD_decompress(val, srcSize, dst, cSize); - - ZSTD_freeCCtx(zc); + char* const val = malloc(srcSize); + assert(val); - if (ZSTD_isError(res)) { - printf("ERROR: %s\n", ZSTD_getErrorName(res)); - return 1; + { + size_t const res = ZSTD_decompress(val, srcSize, dst, cSize); + CHECK(res); } if (memcmp(src, val, srcSize) == 0) { @@ -87,4 +87,6 @@ int main(int argc, char *argv[]) { } return 1; } + + ZSTD_freeCCtx(zc); } diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index 7a067d16604..70c61bc82ef 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -41,7 +41,7 @@ size_t simpleExternalMatchFinder( hashTable[i] = BADIDX; } } - while (ip + 4 < iend) { + while (ip + MLS < iend) { size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); U32 const matchIndex = hashTable[hash]; hashTable[hash] = (U32)(ip - istart); diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index 1f037abe67d..1a080b9a12c 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -42,7 +42,7 @@ static size_t simpleExternalMatchFinder( hashTable[i] = BADIDX; } } - while (ip + 4 < iend) { + while (ip + MLS < iend) { size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); U32 const matchIndex = hashTable[hash]; hashTable[hash] = (U32)(ip - istart); From b6d48a2c427625b14cf1505bec433855e15c8554 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 19 Dec 2022 12:35:05 -0800 Subject: [PATCH 48/60] contrib nits --- contrib/externalMatchfinder/main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 2ad0e408e34..6f084578e5e 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -19,7 +19,7 @@ #include "matchfinder.h" // simpleExternalMatchFinder #define CHECK(res) \ -do { \ +do { \ if (ZSTD_isError(res)) { \ printf("ERROR: %s\n", ZSTD_getErrorName(res)); \ return 1; \ @@ -76,7 +76,6 @@ int main(int argc, char *argv[]) { printf("Compression and decompression were successful!\n"); printf("Original size: %lu\n", srcSize); printf("Compressed size: %lu\n", cSize); - return 0; } else { printf("ERROR: input and validation buffers don't match!\n"); for (int i = 0; i < srcSize; i++) { @@ -89,4 +88,8 @@ int main(int argc, char *argv[]) { } ZSTD_freeCCtx(zc); + free(src); + free(dst); + free(val); + return 0; } From a40cea45c44ebabb7fad06c72b0a0ae648bb4b6b Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 21 Dec 2022 10:53:14 -0800 Subject: [PATCH 49/60] allow block splitter + external matchfinder, refactor --- lib/compress/zstd_compress.c | 58 +++++++++++++++++------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e50e4e6b8ae..f06b51f2d08 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3018,21 +3018,12 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable); /* External matchfinder + LDM is technically possible, just not implemented yet. - * We need to revisit soon and implement it. */ - if (zc->appliedParams.useBlockSplitter == ZSTD_ps_enable) { - RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchFinder, - parameter_combination_unsupported, - "Block splitting with external matchfinder enabled is not currently supported. " - "Note: block splitting is enabled by default at high compression levels." - ); - } else { - RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchFinder, - parameter_combination_unsupported, - "Long-distance matching with external matchfinder enabled is not currently supported." - ); - } + * We need to revisit soon and implement it. */ + RETURN_ERROR_IF( + zc->appliedParams.useExternalMatchFinder, + parameter_combination_unsupported, + "Long-distance matching with external matchfinder enabled is not currently supported." + ); /* Updates ldmSeqStore.pos */ lastLLSize = @@ -3089,29 +3080,34 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) srcSize ); + /* Return early if there is no error, since we don't need to worry about last literals */ if (!ZSTD_isError(nbPostProcessedSeqs)) { ZSTD_sequencePosition seqPos = {0,0,0}; ZSTD_copySequencesToSeqStoreExplicitBlockDelim( zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs, src, srcSize ); ms->ldmSeqStore = NULL; - lastLLSize = 0; DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs); - } else { - if (zc->appliedParams.enableMatchFinderFallback) { - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, - zc->appliedParams.useRowMatchFinder, - dictMode); - ms->ldmSeqStore = NULL; - DEBUGLOG( - 5, - "External matchfinder returned error code %lu. Falling back to internal matchfinder.", - (unsigned long)nbExternalSeqs - ); - lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); - } else { - return nbPostProcessedSeqs; /* return an error */ - } + return ZSTDbss_compress; + } + + /* Propagate the error if fallback is disabled */ + if (!zc->appliedParams.enableMatchFinderFallback) { + + return nbPostProcessedSeqs; + } + + /* Fallback to software matchfinder */ + { ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); + ms->ldmSeqStore = NULL; + DEBUGLOG( + 5, + "External matchfinder returned error code %lu. Falling back to internal matchfinder.", + (unsigned long)nbExternalSeqs + ); + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } } } else { /* not long range mode and no external matchfinder */ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, From 31260d8f0573ea5c01f290397be2c8ff6bc8217f Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 21 Dec 2022 12:10:45 -0800 Subject: [PATCH 50/60] add windowSize param --- contrib/externalMatchfinder/matchfinder.c | 15 +++++--- contrib/externalMatchfinder/matchfinder.h | 3 +- lib/compress/zstd_compress.c | 43 ++++++++++------------- lib/zstd.h | 3 +- tests/external_matchfinder.c | 21 +++++++---- tests/external_matchfinder.h | 3 +- 6 files changed, 48 insertions(+), 40 deletions(-) diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index 70c61bc82ef..04e3b8a514e 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -21,7 +21,8 @@ size_t simpleExternalMatchFinder( ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, - int compressionLevel + int compressionLevel, + size_t windowSize ) { const BYTE* const istart = (const BYTE*)src; const BYTE* const iend = istart + srcSize; @@ -55,10 +56,14 @@ size_t simpleExternalMatchFinder( ZSTD_Sequence const seq = { offset, litLen, matchLen, 0 }; - outSeqs[seqCount++] = seq; - ip += matchLen; - anchor = ip; - continue; + + /* Note: it's crucial to stay within the window size! */ + if (offset <= windowSize) { + outSeqs[seqCount++] = seq; + ip += matchLen; + anchor = ip; + continue; + } } } diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index 7b259d00124..c88ae46f35c 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -19,7 +19,8 @@ size_t simpleExternalMatchFinder( ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, - int compressionLevel + int compressionLevel, + size_t windowSize ); #endif diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f06b51f2d08..500aa66baf3 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -278,6 +278,15 @@ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable; } +/* Enables validation for external sequences in debug builds. */ +static int ZSTD_resolveExternalSequenceValidation(int mode) { +#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) + return 1; +#else + return mode; +#endif +} + /* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged. * If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */ static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) { @@ -301,6 +310,7 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( } cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams); cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); + cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences); assert(!ZSTD_checkCParams(cParams)); return cctxParams; } @@ -362,6 +372,7 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams); cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams); cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams); + cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences); DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d", cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm); } @@ -2904,7 +2915,6 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) * - Checks whether nbExternalSeqs represents an error condition. * - Appends a block delimiter to outSeqs if one is not already present. * See zstd.h for context regarding block delimiters. - * - In debug builds, verify that the external litLengths + matchLengths add to srcSize. * Returns the number of sequences after post-processing, or an error code. */ static size_t ZSTD_postProcessExternalMatchFinderResult( ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize @@ -2927,26 +2937,6 @@ static size_t ZSTD_postProcessExternalMatchFinderResult( return 1; } -#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) - { - size_t coveredBytes = 0; - size_t idx = 0; - - for (idx = 0; idx < nbExternalSeqs; idx++) { - /* We already know that nbExternalSeqs <= outSeqsCapacity */ - assert(idx < outSeqsCapacity); - coveredBytes += outSeqs[idx].litLength; - coveredBytes += outSeqs[idx].matchLength; - } - - RETURN_ERROR_IF( - coveredBytes != srcSize, - externalMatchFinder_failed, - "External matchfinder produced an incorrect parse!" - ); - } -#endif - { ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1]; @@ -3018,7 +3008,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable); /* External matchfinder + LDM is technically possible, just not implemented yet. - * We need to revisit soon and implement it. */ + * We need to revisit soon and implement it. */ RETURN_ERROR_IF( zc->appliedParams.useExternalMatchFinder, parameter_combination_unsupported, @@ -3064,13 +3054,16 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ); assert(zc->externalMatchCtx.mFinder != NULL); - { size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)( + { U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog; + + size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)( zc->externalMatchCtx.mState, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, NULL, 0, /* dict and dictSize, currently not supported */ - zc->appliedParams.compressionLevel + zc->appliedParams.compressionLevel, + windowSize ); size_t const nbPostProcessedSeqs = ZSTD_postProcessExternalMatchFinderResult( @@ -3093,7 +3086,6 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* Propagate the error if fallback is disabled */ if (!zc->appliedParams.enableMatchFinderFallback) { - return nbPostProcessedSeqs; } @@ -5902,6 +5894,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams); params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams); params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams); + params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences); #ifdef ZSTD_MULTITHREAD if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { diff --git a/lib/zstd.h b/lib/zstd.h index efaaf2eda21..9713951ddb4 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1524,7 +1524,8 @@ typedef size_t ZSTD_externalMatchFinder_F ( ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, - int compressionLevel + int compressionLevel, + size_t windowSize ); ZSTDLIB_STATIC_API void diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index 1a080b9a12c..c8b9a4d7556 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -22,7 +22,8 @@ static size_t simpleExternalMatchFinder( ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, - int compressionLevel + int compressionLevel, + size_t windowSize ) { const BYTE* const istart = (const BYTE*)src; const BYTE* const iend = istart + srcSize; @@ -56,10 +57,14 @@ static size_t simpleExternalMatchFinder( ZSTD_Sequence const seq = { offset, litLen, matchLen, 0 }; - outSeqs[seqCount++] = seq; - ip += matchLen; - anchor = ip; - continue; + + /* Note: it's crucial to stay within the window size! */ + if (offset <= windowSize) { + outSeqs[seqCount++] = seq; + ip += matchLen; + anchor = ip; + continue; + } } } @@ -80,7 +85,8 @@ size_t zstreamExternalMatchFinder( ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, - int compressionLevel + int compressionLevel, + size_t windowSize ) { EMF_testCase const testCase = *((EMF_testCase*)externalMatchState); memset(outSeqs, 0, outSeqsCapacity); @@ -99,7 +105,8 @@ size_t zstreamExternalMatchFinder( outSeqs, outSeqsCapacity, src, srcSize, dict, dictSize, - compressionLevel + compressionLevel, + windowSize ); case EMF_SMALL_ERROR: return outSeqsCapacity + 1; diff --git a/tests/external_matchfinder.h b/tests/external_matchfinder.h index 809c7bb24ab..c24c03b7619 100644 --- a/tests/external_matchfinder.h +++ b/tests/external_matchfinder.h @@ -28,7 +28,8 @@ size_t zstreamExternalMatchFinder( ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, - int compressionLevel + int compressionLevel, + size_t windowSize ); #endif // EXTERNAL_MATCHFINDER From f89cedc21d0a834f5444fda6a2309d638be9ae5e Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 21 Dec 2022 12:41:14 -0800 Subject: [PATCH 51/60] add contrib/externalMatchfinder/README.md --- contrib/externalMatchfinder/README.md | 14 ++++++++++++++ lib/compress/zstd_compress.c | 1 + lib/compress/zstd_compress_internal.h | 1 + 3 files changed, 16 insertions(+) create mode 100644 contrib/externalMatchfinder/README.md diff --git a/contrib/externalMatchfinder/README.md b/contrib/externalMatchfinder/README.md new file mode 100644 index 00000000000..cb7d49d97a8 --- /dev/null +++ b/contrib/externalMatchfinder/README.md @@ -0,0 +1,14 @@ +externalMatchfinder +===================== + +`externalMatchfinder` is a test tool for the external matchfinder API. +It demonstrates how to use the API to perform a simple round-trip test. + +A sample matchfinder is provided in matchfinder.c, but the user can swap +this out with a different one if desired. The sample matchfinder implements +LZ compression with a 1KB hashtable. Dictionary compression is not currently supported. + +Command line : +``` +externalMatchfinder filename +``` diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 500aa66baf3..3784866eab1 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -281,6 +281,7 @@ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, /* Enables validation for external sequences in debug builds. */ static int ZSTD_resolveExternalSequenceValidation(int mode) { #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) + (void)mode; return 1; #else return mode; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 10ff3be11f2..f755a1f79f7 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -1439,6 +1439,7 @@ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. + * Note that the block delimiter must include the last literals of the block. */ size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, From 628755c16313a8ea6297ada78321f5e3f6a127cb Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 21 Dec 2022 14:37:16 -0800 Subject: [PATCH 52/60] docs --- lib/zstd.h | 149 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 123 insertions(+), 26 deletions(-) diff --git a/lib/zstd.h b/lib/zstd.h index 9713951ddb4..cf942b2f1f2 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1510,31 +1510,6 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, const ZSTD_Sequence* inSeqs, size_t inSeqsSize, const void* src, size_t srcSize); -/* Block-level sequence compression API */ -/* @nocommit document */ - -#define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1)) - -/* @nocommit document these constraints: - * - outSeqsCapacity >= (blockSize / MINMATCH) + 1 - * - srcSize <= 128 KB - * - dictSize is not bounded */ -typedef size_t ZSTD_externalMatchFinder_F ( - void* externalMatchState, - ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, - const void* src, size_t srcSize, - const void* dict, size_t dictSize, - int compressionLevel, - size_t windowSize -); - -ZSTDLIB_STATIC_API void -ZSTD_registerExternalMatchFinder( - ZSTD_CCtx* cctx, - void* externalMatchState, - ZSTD_externalMatchFinder_F* externalMatchFinder -); - /****************************************/ /*! ZSTD_writeSkippableFrame() : @@ -2072,7 +2047,18 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo */ #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 -/* @nocommit document */ +/* ZSTD_c_enableMatchFinderFallback + * Allowed values are 0 (disable) and 1 (enable). The default setting is 0. + * + * Controls whether zstd will fall back to an internal matchfinder if an + * external matchfinder is registered and returns an error code. This fallback is + * block-by-block: the internal matchfinder will only be called for blocks where + * the external matchfinder returns an error code. Fallback compression will + * follow any other cParam settings, such as compression level, the same as in a + * normal (fully-internal) compression operation. + * + * The user is strongly encouraged to read the full external matchfinder API + * documentation (below) before setting this parameter. */ #define ZSTD_c_enableMatchFinderFallback ZSTD_c_experimentalParam17 /*! ZSTD_CCtx_getParameter() : @@ -2707,6 +2693,117 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ +/* ********************** EXTERNAL MATCHFINDER API ********************** + * + * OVERVIEW + * This API allows users to replace the zstd internal block-level matchfinder + * with an external matchfinder function. Potential applications of the API + * include hardware-accelerated matchfinders and matchfinders specialized to + * particular types of data. + * + * USAGE + * The user is responsible for implementing a function of type + * ZSTD_externalMatchFinder_F. For each block, zstd will pass the following + * arguments to the user-provided function: + * + * - externalMatchState: a pointer to a user-managed state for the external + * matchfinder. + * + * - outSeqs, outSeqsCapacity: an output buffer for sequences produced by the + * external matchfinder. outSeqsCapacity is guaranteed >= + * ZSTD_sequenceBound(srcSize). The memory backing outSeqs is managed by + * the CCtx. + * + * - src, srcSize: an input buffer which the external matchfinder must parse + * into sequences. srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX. + * + * - dict, dictSize: a history buffer, which may be empty, which the external + * matchfinder may reference as it produces sequences for the src buffer. + * Currently, zstd will always pass dictSize == 0 into external matchfinders, + * but this will change in the future. + * + * - compressionLevel: a signed integer representing the zstd compression level + * set by the user for the current operation. The external matchfinder may + * choose to use this information to change its compression strategy and + * speed/ratio tradeoff. Note: The compression level does not reflect zstd + * parameters set through the advanced API. + * + * - windowSize: a size_t representing the maximum allowed offset for external + * sequences. Note that sequence offsets are sometimes allowed to exceed the + * windowSize if a dictionary is present, see doc/zstd_compression_format.md + * for details. + * + * The user-provided function shall return a size_t representing the number of + * sequences written to outSeqs. This return value will be treated as an error + * code if it is greater than outSeqsCapacity. The return value must be non-zero + * if srcSize is non-zero. The ZSTD_EXTERNAL_MATCHFINDER_ERROR macro is provided + * for convenience, but any value greater than outSeqsCapacity will be treated as + * an error code. + * + * If the user-provided function does not return an error code, the sequences + * written to outSeqs must be a valid parse of the src buffer. Data corruption may + * occur if the parse is not valid. A parse is defined to be valid if the + * following conditions hold: + * - The sum of matchLengths and literalLengths is equal to srcSize. + * - All sequences in the parse have matchLength != 0, except for the final + * sequence. matchLength is not constrained for the final sequence. + * - All offsets respect the windowSize parameter as specified in + * doc/zstd_compression_format.md. + * + * zstd will only validate these conditions (and fail compression if they do not + * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence + * validation has a performance cost. + * + * If the user-provided function returns an error, zstd will either fall back + * to an internal matchfinder or fail the compression operation. The user can + * choose between the two behaviors by setting the + * ZSTD_c_enableMatchFinderFallback cParam. Fallback compression will follow any + * other cParam settings, such as compression level, the same as in a normal + * compression operation. + * + * The user shall instruct zstd to use a particular ZSTD_externalMatchFinder_F + * function by calling ZSTD_refExternalMatchFinder(cctx, externalMatchState, + * externalMatchFinder). This setting will persist until the next parameter reset + * of the CCtx. + * + * The externalMatchState must be initialized by the user before calling + * ZSTD_refExternalMatchFinder. The user is responsible for destroying the + * externalMatchState. + * + * LIMITATIONS + * This API is currently incompatible with long-distance matching. As mentioned + * above, history buffers (stream history, dictionaries) are currently not + * supported. We plan to remove these limitations in the future. + */ + +#define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1)) + +typedef size_t ZSTD_externalMatchFinder_F ( + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel, + size_t windowSize +); + +/*! ZSTD_registerExternalMatchFinder() : + * Instruct zstd to use an external matchfinder function. + * + * The externalMatchState must be initialized by the caller, and the caller is + * responsible for managing its lifetime. This parameter is sticky across + * compressions. It will remain set until the user explicitly resets compression + * parameters. + * + * The user is strongly encouraged to read the full API documentation (above) + * before calling this function. */ +ZSTDLIB_STATIC_API void +ZSTD_registerExternalMatchFinder( + ZSTD_CCtx* cctx, + void* externalMatchState, + ZSTD_externalMatchFinder_F* externalMatchFinder +); + #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ #if defined (__cplusplus) From b12c8e45ecd5daa75d39d022f0eb9ba362a54850 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 21 Dec 2022 14:53:44 -0800 Subject: [PATCH 53/60] go back to old RLE heuristic because of the first block issue --- lib/compress/zstd_compress.c | 14 ++++---------- lib/zstd.h | 1 - 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 3784866eab1..b7f23482e55 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3247,18 +3247,12 @@ static int ZSTD_isRLE(const BYTE* src, size_t length) { * This is just a heuristic based on the compressibility. * It may return both false positives and false negatives. */ -static int ZSTD_maybeRLE(seqStore_t const* seqStore, ZSTD_CCtx_params const* appliedParams) +static int ZSTD_maybeRLE(seqStore_t const* seqStore) { size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); - if (appliedParams->useExternalMatchFinder) { - /* We shouldn't make any assumptions about how an external matchfinder - * will compress an RLE block. */ - return 1; - } else { - return nbSeqs < 4 && nbLits < 10; - } + return nbSeqs < 4 && nbLits < 10; } static void @@ -4108,7 +4102,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, * This is only an issue for zstd <= v1.4.3 */ !zc->isFirstBlock && - ZSTD_maybeRLE(&zc->seqStore, &zc->appliedParams) && + ZSTD_maybeRLE(&zc->seqStore) && ZSTD_isRLE((BYTE const*)src, srcSize)) { return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); @@ -6448,7 +6442,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); if (!cctx->isFirstBlock && - ZSTD_maybeRLE(&cctx->seqStore, &cctx->appliedParams) && + ZSTD_maybeRLE(&cctx->seqStore) && ZSTD_isRLE(ip, blockSize)) { /* We don't want to emit our first block as a RLE even if it qualifies because * doing so will cause the decoder (cli only) to throw a "should consume all input error." diff --git a/lib/zstd.h b/lib/zstd.h index cf942b2f1f2..1728c4e17ca 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1510,7 +1510,6 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, const ZSTD_Sequence* inSeqs, size_t inSeqsSize, const void* src, size_t srcSize); -/****************************************/ /*! ZSTD_writeSkippableFrame() : * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. From 12b7ca1db6074d6336bb5360af9a043504aa60c2 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 21 Dec 2022 14:56:41 -0800 Subject: [PATCH 54/60] fix initializer element is not a constant expression --- contrib/externalMatchfinder/matchfinder.c | 2 +- tests/external_matchfinder.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index 04e3b8a514e..287b721d374 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -14,7 +14,7 @@ #define HSIZE 1024 static U32 const HLOG = 10; static U32 const MLS = 4; -static U32 const BADIDX = (1 << 31); +static U32 const BADIDX = 0xffffffff; size_t simpleExternalMatchFinder( void* externalMatchState, diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index c8b9a4d7556..6b7d55e6701 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -15,7 +15,7 @@ #define HSIZE 1024 static U32 const HLOG = 10; static U32 const MLS = 4; -static U32 const BADIDX = (1 << 31); +static U32 const BADIDX = 0xffffffff; static size_t simpleExternalMatchFinder( void* externalMatchState, From 4b15448db05db6b6ad1641abc206f763c74487c3 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 21 Dec 2022 15:08:08 -0800 Subject: [PATCH 55/60] ref contrib from zstd.h --- lib/zstd.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/zstd.h b/lib/zstd.h index 1728c4e17ca..b338f54bda6 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -2700,6 +2700,9 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc * include hardware-accelerated matchfinders and matchfinders specialized to * particular types of data. * + * See contrib/externalMatchfinder for an example program employing the + * external matchfinder API. + * * USAGE * The user is responsible for implementing a function of type * ZSTD_externalMatchFinder_F. For each block, zstd will pass the following From c2574e74d81d732a66784efaff09859abb52f66f Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 22 Dec 2022 15:40:38 -0800 Subject: [PATCH 56/60] extremely pedantic compiler warning fix, meson fix, typo fix --- build/meson/tests/meson.build | 6 ++++-- contrib/externalMatchfinder/main.c | 24 +++++++++++++++++------- lib/zstd.h | 4 ++-- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/build/meson/tests/meson.build b/build/meson/tests/meson.build index f7ba5310188..e70b73432c5 100644 --- a/build/meson/tests/meson.build +++ b/build/meson/tests/meson.build @@ -65,8 +65,10 @@ fuzzer = executable('fuzzer', dependencies: [ testcommon_dep, thread_dep ], install: false) -zstreamtest_sources = [join_paths(zstd_rootdir, 'tests/seqgen.c'), - join_paths(zstd_rootdir, 'tests/zstreamtest.c')] +zstreamtest_sources = [ + join_paths(zstd_rootdir, 'tests/seqgen.c'), + join_paths(zstd_rootdir, 'tests/zstreamtest.c'), + join_paths(zstd_rootdir, 'tests/external_matchfinder.c')] zstreamtest = executable('zstreamtest', zstreamtest_sources, include_directories: test_includes, diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 6f084578e5e..e7b11037198 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -50,13 +50,23 @@ int main(int argc, char *argv[]) { FILE *f = fopen(argv[1], "rb"); assert(f); - fseek(f, 0, SEEK_END); - long const srcSize = ftell(f); - fseek(f, 0, SEEK_SET); + { + int const ret = fseek(f, 0, SEEK_END); + assert(ret == 0); + } + size_t const srcSize = ftell(f); + { + int const ret = fseek(f, 0, SEEK_SET); + assert(ret == 0); + } char* const src = malloc(srcSize + 1); - fread(src, srcSize, 1, f); - fclose(f); + { + size_t const ret = fread(src, srcSize, 1, f); + assert(ret == 1); + int const ret2 = fclose(f); + assert(ret2 == 0); + } size_t const dstSize = ZSTD_compressBound(srcSize); char* const dst = malloc(dstSize); @@ -78,9 +88,9 @@ int main(int argc, char *argv[]) { printf("Compressed size: %lu\n", cSize); } else { printf("ERROR: input and validation buffers don't match!\n"); - for (int i = 0; i < srcSize; i++) { + for (size_t i = 0; i < srcSize; i++) { if (src[i] != val[i]) { - printf("First bad index: %d\n", i); + printf("First bad index: %zu\n", i); break; } } diff --git a/lib/zstd.h b/lib/zstd.h index b338f54bda6..6c280365718 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -2764,12 +2764,12 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc * compression operation. * * The user shall instruct zstd to use a particular ZSTD_externalMatchFinder_F - * function by calling ZSTD_refExternalMatchFinder(cctx, externalMatchState, + * function by calling ZSTD_registerExternalMatchFinder(cctx, externalMatchState, * externalMatchFinder). This setting will persist until the next parameter reset * of the CCtx. * * The externalMatchState must be initialized by the user before calling - * ZSTD_refExternalMatchFinder. The user is responsible for destroying the + * ZSTD_registerExternalMatchFinder. The user is responsible for destroying the * externalMatchState. * * LIMITATIONS From 8052b104e5158dde469f097841c60ea00e676d00 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 28 Dec 2022 08:29:08 -0800 Subject: [PATCH 57/60] Additional docs on API limitations --- lib/zstd.h | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/lib/zstd.h b/lib/zstd.h index 6c280365718..14a7d23066d 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -2694,7 +2694,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc /* ********************** EXTERNAL MATCHFINDER API ********************** * - * OVERVIEW + * *** OVERVIEW *** * This API allows users to replace the zstd internal block-level matchfinder * with an external matchfinder function. Potential applications of the API * include hardware-accelerated matchfinders and matchfinders specialized to @@ -2703,7 +2703,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc * See contrib/externalMatchfinder for an example program employing the * external matchfinder API. * - * USAGE + * *** USAGE *** * The user is responsible for implementing a function of type * ZSTD_externalMatchFinder_F. For each block, zstd will pass the following * arguments to the user-provided function: @@ -2772,10 +2772,32 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc * ZSTD_registerExternalMatchFinder. The user is responsible for destroying the * externalMatchState. * - * LIMITATIONS - * This API is currently incompatible with long-distance matching. As mentioned - * above, history buffers (stream history, dictionaries) are currently not - * supported. We plan to remove these limitations in the future. + * *** LIMITATIONS *** + * External matchfinders are compatible with all zstd compression APIs. There are + * only two limitations. + * + * First, the ZSTD_c_enableLongDistanceMatching cParam is not supported. + * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with an + * external matchfinder. + * - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in + * some cases (see its documentation for details). Users must explicitly set + * ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an + * external matchfinder is registered. + * - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default + * whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should + * check the docs on ZSTD_c_enableLongDistanceMatching whenever the external + * matchfinder API is used in conjunction with advanced settings (like windowLog). + * + * Second, history buffers are not supported. Concretely, zstd will always pass + * dictSize == 0 to the external matchfinder (for now). This has two implications: + * - Dictionaries are not supported. Compression will *not* fail if the user + * references a dictionary, but the dictionary won't have any effect. + * - Stream history is not supported. All compression APIs, including streaming + * APIs, work with the external matchfinder, but the external matchfinder won't + * receive any history from the previous block. Each block is an independent chunk. + * + * Long-term, we plan to overcome both limitations. There is no technical blocker to + * overcoming them. It is purely a question of engineering effort. */ #define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1)) From 1e60543110265617d8d2366f5418163f56162925 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 28 Dec 2022 10:08:39 -0800 Subject: [PATCH 58/60] minor nits --- contrib/externalMatchfinder/main.c | 2 ++ tests/zstreamtest.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index e7b11037198..db3cf8a9f39 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -61,6 +61,7 @@ int main(int argc, char *argv[]) { } char* const src = malloc(srcSize + 1); + assert(src); { size_t const ret = fread(src, srcSize, 1, f); assert(ret == 1); @@ -70,6 +71,7 @@ int main(int argc, char *argv[]) { size_t const dstSize = ZSTD_compressBound(srcSize); char* const dst = malloc(dstSize); + assert(dst); size_t const cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize); CHECK(cSize); diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 496885e6624..664ff632ac1 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1837,9 +1837,9 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) DISPLAYLEVEL(3, "test%3i : External matchfinder API: ", testNb++); { size_t const dstBufSize = ZSTD_compressBound(CNBufferSize); - BYTE* dstBuf = (BYTE*)malloc(ZSTD_compressBound(dstBufSize)); + BYTE* const dstBuf = (BYTE*)malloc(ZSTD_compressBound(dstBufSize)); size_t const checkBufSize = CNBufferSize; - BYTE* checkBuf = (BYTE*)malloc(checkBufSize); + BYTE* const checkBuf = (BYTE*)malloc(checkBufSize); int enableFallback; EMF_testCase externalMatchState; From 49cd2e8e7980b12919f9d4cba0820d3ac49ae745 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 28 Dec 2022 10:52:11 -0800 Subject: [PATCH 59/60] Refactor maxNbSeq calculation into a helper function --- lib/compress/zstd_compress.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b7f23482e55..9ded251295f 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1513,6 +1513,13 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, return tableSpace + optSpace + slackSpace + lazyAdditionalSpace; } +/* Helper function for calculating memory requirements. + * Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */ +static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useExternalMatchFinder) { + U32 const divider = (minMatch==3 || useExternalMatchFinder) ? 3 : 4; + return blockSize / divider; +} + static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( const ZSTD_compressionParameters* cParams, const ldmParams_t* ldmParams, @@ -1525,8 +1532,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( { size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (cParams->minMatch==3 || useExternalMatchFinder) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; + size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useExternalMatchFinder); size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); @@ -1921,8 +1927,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (params->cParams.minMatch==3 || params->useExternalMatchFinder) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; + size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useExternalMatchFinder); size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 : 0; From 241f2a77530965a9116782c896e9ad9ab15d0da3 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 28 Dec 2022 11:11:27 -0800 Subject: [PATCH 60/60] Fix copyright --- contrib/externalMatchfinder/Makefile | 2 +- contrib/externalMatchfinder/main.c | 2 +- contrib/externalMatchfinder/matchfinder.c | 2 +- contrib/externalMatchfinder/matchfinder.h | 2 +- tests/external_matchfinder.c | 2 +- tests/external_matchfinder.h | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/contrib/externalMatchfinder/Makefile b/contrib/externalMatchfinder/Makefile index fee06b1c5a1..2baa558cb56 100644 --- a/contrib/externalMatchfinder/Makefile +++ b/contrib/externalMatchfinder/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) Yann Collet, Facebook, Inc. +# Copyright (c) Yann Collet, Meta Platforms, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index db3cf8a9f39..6971a46c7e2 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index 287b721d374..f119193ef1d 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index c88ae46f35c..f8ba1c96531 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index 6b7d55e6701..8ae76d519ef 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/external_matchfinder.h b/tests/external_matchfinder.h index c24c03b7619..041f73e4d2a 100644 --- a/tests/external_matchfinder.h +++ b/tests/external_matchfinder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the