From c48b932eae2949a13a181c2c334f537a1ab3a564 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 9 Nov 2022 20:05:03 -0500 Subject: [PATCH 01/34] First building commit with sample matchfinder --- contrib/externalMatchfinder/Makefile | 40 ++++++++++++++ contrib/externalMatchfinder/main.c | 64 +++++++++++++++++++++++ contrib/externalMatchfinder/matchfinder.c | 62 ++++++++++++++++++++++ contrib/externalMatchfinder/matchfinder.h | 12 +++++ lib/compress/zstd_compress.c | 42 ++++++++++++--- lib/compress/zstd_compress_internal.h | 15 ++++++ lib/zstd.h | 18 +++++++ 7 files changed, 246 insertions(+), 7 deletions(-) create mode 100644 contrib/externalMatchfinder/Makefile create mode 100644 contrib/externalMatchfinder/main.c create mode 100644 contrib/externalMatchfinder/matchfinder.c create mode 100644 contrib/externalMatchfinder/matchfinder.h diff --git a/contrib/externalMatchfinder/Makefile b/contrib/externalMatchfinder/Makefile new file mode 100644 index 00000000000..46a5fc2dc08 --- /dev/null +++ b/contrib/externalMatchfinder/Makefile @@ -0,0 +1,40 @@ +# ################################################################ +# Copyright (c) 2018-present, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ################################################################ + +PROGDIR = ../../programs +LIBDIR = ../../lib + +LIBZSTD = $(LIBDIR)/libzstd.a + +CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/compress -I$(LIBDIR)/common + +CFLAGS ?= -O3 +CFLAGS += -std=gnu99 +DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum \ + -Wstrict-prototypes -Wundef -Wpointer-arith \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) + +default: externalMatchfinder + +all: externalMatchfinder + +externalMatchfinder: matchfinder.c main.c $(LIBZSTD) + $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ + +.PHONY: $(LIBZSTD) +$(LIBZSTD): + $(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)" + +clean: + $(RM) *.o + $(MAKE) -C $(LIBDIR) clean > /dev/null + $(RM) externalMatchfinder diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c new file mode 100644 index 00000000000..6ffe282979b --- /dev/null +++ b/contrib/externalMatchfinder/main.c @@ -0,0 +1,64 @@ +#include +#include +#include + +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" +#include "matchfinder.h" // simpleExternalMatchfinder + +const size_t ZSTD_LEVEL = 1; + +int main(int argc, char *argv[]) { + ZSTD_CCtx* zc = ZSTD_createCCtx(); + + // Here is the crucial bit of code! + ZSTD_registerExternalMatchfinder(zc, simpleExternalMatchfinder); + + if (argc != 2) { + printf("Usage: exampleMatchfinder \n"); + return 1; + } + + FILE *f = fopen(argv[1], "rb"); + fseek(f, 0, SEEK_END); + long srcSize = ftell(f); + fseek(f, 0, SEEK_SET); + + char *src = malloc(srcSize + 1); + fread(src, srcSize, 1, f); + fclose(f); + + size_t dstSize = ZSTD_compressBound(srcSize); + char *dst = malloc(dstSize); + + size_t cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize); + + if (ZSTD_isError(cSize)) { + printf("ERROR: %lu\n", cSize); + return 1; + } + + char *val = malloc(srcSize); + size_t res = ZSTD_decompress(val, srcSize, dst, cSize); + + if (ZSTD_isError(res)) { + printf("ERROR: %lu\n", cSize); + return 1; + } + + if (memcmp(src, val, srcSize) == 0) { + printf("Compression and decompression were successful!\n"); + printf("Original size: %lu\n", srcSize); + printf("Compressed size: %lu\n", cSize); + return 0; + } else { + printf("ERROR: input and validation buffers don't match!\n"); + for (int i = 0; i < srcSize; i++) { + if (src[i] != val[i]) { + printf("First bad index: %d\n", i); + break; + } + } + return 1; + } +} diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c new file mode 100644 index 00000000000..a67cdad482a --- /dev/null +++ b/contrib/externalMatchfinder/matchfinder.c @@ -0,0 +1,62 @@ +#include "zstd_compress_internal.h" +#include "matchfinder.h" +#include + +#define HSIZE 1024 +static U32 const HLOG = 10; +static U32 const MLS = 4; +static U32 const BADIDX = (1 << 31); + +size_t simpleExternalMatchfinder( + void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, size_t historySize +) { + (void)(externalMatchState); + (void)(historySize); + (void)(outSeqsCapacity); // @nocommit return an error + + const BYTE* const istart = (const BYTE*)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + const BYTE* anchor = istart; + + size_t seqCount = 0; + + U32 hashTable[HSIZE]; + for (int i=0; i < HSIZE; i++) { + hashTable[i] = BADIDX; + } + + while (ip + 4 < iend) { + size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); + U32 const matchIndex = hashTable[hash]; + hashTable[hash] = ip - istart; + + if (matchIndex != BADIDX) { + const BYTE* const match = istart + matchIndex; + size_t const matchLen = ZSTD_count(ip, match, iend); + if (matchLen >= ZSTD_MINMATCH_MIN) { + U32 const litLen = ip - anchor; + U32 const offset = ip - match; + ZSTD_Sequence const seq = { + offset, litLen, matchLen, 0 + }; + outSeqs[seqCount++] = seq; + ip += matchLen; + anchor = ip; + continue; + } + } + + ip++; + } + + { + ZSTD_Sequence const finalSeq = { + 0, iend - anchor, 0, 0 + }; + outSeqs[seqCount] = finalSeq; + } + + return seqCount; +} diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h new file mode 100644 index 00000000000..ad2c1b2b6e8 --- /dev/null +++ b/contrib/externalMatchfinder/matchfinder.h @@ -0,0 +1,12 @@ +#ifndef MATCHFINDER_H +#define MATCHFINDER_H + +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" + +size_t simpleExternalMatchfinder( + void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, size_t historySize +); + +#endif diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index adf1f6e7afc..48eabf229f9 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -28,6 +28,7 @@ #include "zstd_ldm.h" #include "zstd_compress_superblock.h" #include "../common/bits.h" /* ZSTD_highbit32 */ +#include /* *************************************************************** * Tuning parameters @@ -2858,6 +2859,14 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; +// @nocommit External matchfinder, need to move this at some point +ZSTD_Sequence globalSeqs[100000]; // @nocommit + +void ZSTD_registerExternalMatchfinder(ZSTD_CCtx* zc, ZSTD_externalMatchfinder_F* externalBlockMatchfinder) { + // @nocommit Check on how to make this sticky + zc->externalBlockMatchfinder = externalBlockMatchfinder; +} + static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) { ZSTD_matchState_t* const ms = &zc->blockState.matchState; @@ -2928,6 +2937,15 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); + } else if (1 /* @nocommit: change to a cparam */) { + size_t numSeqsFound = (zc->externalBlockMatchfinder)( + NULL, globalSeqs, 100000, src, srcSize, 0 + ); + ZSTD_sequencePosition seqPos = {0,0,0}; + lastLLSize = 0; // @nocommit + ZSTD_copySequencesToSeqStoreExplicitBlockDelim( + zc, &seqPos, globalSeqs, numSeqsFound, src, srcSize + ); } else { /* not long range mode */ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, @@ -3819,6 +3837,22 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, return cSize; } +// @nocommit +// static size_t ZSTD_buildSeqStoreUsingExternalMatchfinder( +// ZSTD_CCtx* zc, const void* src, size_t srcSize +// ) { +// if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { +// if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { +// ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); +// } else { +// ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); +// } +// return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ +// } +// ZSTD_resetSeqStore(&(zc->seqStore)); +// return 0; +// } + static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, @@ -5882,12 +5916,6 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, } } -typedef struct { - U32 idx; /* Index in array of ZSTD_Sequence */ - U32 posInSequence; /* Position within sequence at idx */ - size_t posInSrc; /* Number of bytes given by sequences provided so far */ -} ZSTD_sequencePosition; - /* ZSTD_validateSequence() : * @offCode : is presumed to follow format required by ZSTD_storeSeq() * @returns a ZSTD error code if sequence is not valid @@ -5928,7 +5956,7 @@ static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. */ -static size_t +size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index baa726f7dff..0c922528982 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -149,6 +149,18 @@ typedef struct { size_t capacity; /* The capacity starting from `seq` pointer */ } rawSeqStore_t; +typedef struct { + U32 idx; /* Index in array of ZSTD_Sequence */ + U32 posInSequence; /* Position within sequence at idx */ + size_t posInSrc; /* Number of bytes given by sequences provided so far */ +} ZSTD_sequencePosition; + +size_t +ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, + ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize); + UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; typedef struct { @@ -439,6 +451,9 @@ struct ZSTD_CCtx_s { /* Workspace for block splitter */ ZSTD_blockSplitCtx blockSplitCtx; + + /* External block matchfinder */ + ZSTD_externalMatchfinder_F* externalBlockMatchfinder; }; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; diff --git a/lib/zstd.h b/lib/zstd.h index 1867efc93ef..3b3b7293571 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1474,6 +1474,24 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, const ZSTD_Sequence* inSeqs, size_t inSeqsSize, const void* src, size_t srcSize); +typedef size_t ZSTD_externalMatchfinder_F ( + void* externalMatchState, // TODO make this an externalMatchState type + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + // outSeqsCapacity >= blockSize / MINMATCH + const void* src, size_t srcSize, size_t historySize + // srcSize - historySize <= 128 KB + // historySize < srcSize ; any size +); + +// @nocommit document this +ZSTDLIB_STATIC_API void +ZSTD_registerExternalMatchfinder(ZSTD_CCtx* cctx, ZSTD_externalMatchfinder_F* externalBlockMatchfinder); + +// @nocommit +size_t DONT_COMMIT_simpleExternalMatchfinder( + void* matchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, size_t historySize +); /*! ZSTD_writeSkippableFrame() : * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. From 006eecb61e0d4ec4def017d426bc46b1dbfdbfe3 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 20 Nov 2022 18:54:17 -0700 Subject: [PATCH 02/34] Set up ZSTD_externalMatchCtx struct --- contrib/externalMatchfinder/main.c | 14 +++++-- contrib/externalMatchfinder/matchfinder.c | 6 ++- contrib/externalMatchfinder/matchfinder.h | 4 +- lib/compress/zstd_compress.c | 46 +++++++++++------------ lib/compress/zstd_compress_internal.h | 12 +++++- lib/zstd.h | 24 ++++++++---- 6 files changed, 68 insertions(+), 38 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 6ffe282979b..2d6c547dbd3 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -4,7 +4,8 @@ #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" -#include "matchfinder.h" // simpleExternalMatchfinder +#include "zstd_errors.h" +#include "matchfinder.h" // simpleExternalMatchFinder, simpleExternalMatchStateDestructor const size_t ZSTD_LEVEL = 1; @@ -12,7 +13,12 @@ int main(int argc, char *argv[]) { ZSTD_CCtx* zc = ZSTD_createCCtx(); // Here is the crucial bit of code! - ZSTD_registerExternalMatchfinder(zc, simpleExternalMatchfinder); + ZSTD_registerExternalMatchfinder( + zc, + NULL, + simpleExternalMatchFinder, + simpleExternalMatchStateDestructor + ); if (argc != 2) { printf("Usage: exampleMatchfinder \n"); @@ -34,7 +40,7 @@ int main(int argc, char *argv[]) { size_t cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize); if (ZSTD_isError(cSize)) { - printf("ERROR: %lu\n", cSize); + printf("ERROR: %s\n", ZSTD_getErrorString(cSize)); return 1; } @@ -42,7 +48,7 @@ int main(int argc, char *argv[]) { size_t res = ZSTD_decompress(val, srcSize, dst, cSize); if (ZSTD_isError(res)) { - printf("ERROR: %lu\n", cSize); + printf("ERROR: %s\n", ZSTD_getErrorString(res)); return 1; } diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index a67cdad482a..b8237f8d660 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -7,7 +7,7 @@ static U32 const HLOG = 10; static U32 const MLS = 4; static U32 const BADIDX = (1 << 31); -size_t simpleExternalMatchfinder( +size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, size_t historySize ) { @@ -60,3 +60,7 @@ size_t simpleExternalMatchfinder( return seqCount; } + +void simpleExternalMatchStateDestructor(void* externalMatchState) { + (void)externalMatchState; +} diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index ad2c1b2b6e8..74041e25265 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -4,9 +4,11 @@ #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" -size_t simpleExternalMatchfinder( +size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, size_t historySize ); +void simpleExternalMatchStateDestructor(void* matchState); + #endif diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 48eabf229f9..71da3553cd3 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -28,6 +28,8 @@ #include "zstd_ldm.h" #include "zstd_compress_superblock.h" #include "../common/bits.h" /* ZSTD_highbit32 */ +#define ZSTD_DEPS_NEED_MALLOC +#include "../common/zstd_deps.h" /* ZSTD_malloc */ #include /* *************************************************************** @@ -2860,11 +2862,23 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; // @nocommit External matchfinder, need to move this at some point -ZSTD_Sequence globalSeqs[100000]; // @nocommit - -void ZSTD_registerExternalMatchfinder(ZSTD_CCtx* zc, ZSTD_externalMatchfinder_F* externalBlockMatchfinder) { - // @nocommit Check on how to make this sticky - zc->externalBlockMatchfinder = externalBlockMatchfinder; +void ZSTD_registerExternalMatchfinder( + ZSTD_CCtx* zc, void* mState, + ZSTD_externalMatchFinder_F* mFinder, + ZSTD_externalMatchStateDestructor_F* mStateDestructor +) { + // @nocommit Call clearExternalMatchContext() + size_t const seqBufferSize = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX) * sizeof(ZSTD_Sequence); + void* seqBuffer = ZSTD_malloc(seqBufferSize); + + ZSTD_externalMatchCtx emctx = { + mState, + mFinder, + mStateDestructor, + seqBuffer, + seqBufferSize + }; + zc->externalMatchCtx = emctx; } static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) @@ -2938,13 +2952,13 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); } else if (1 /* @nocommit: change to a cparam */) { - size_t numSeqsFound = (zc->externalBlockMatchfinder)( - NULL, globalSeqs, 100000, src, srcSize, 0 + size_t numSeqsFound = (zc->externalMatchCtx.mFinder)( + NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferSize, src, srcSize, 0 ); ZSTD_sequencePosition seqPos = {0,0,0}; lastLLSize = 0; // @nocommit ZSTD_copySequencesToSeqStoreExplicitBlockDelim( - zc, &seqPos, globalSeqs, numSeqsFound, src, srcSize + zc, &seqPos, zc->externalMatchCtx.seqBuffer, numSeqsFound, src, srcSize ); } else { /* not long range mode */ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, @@ -3837,22 +3851,6 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, return cSize; } -// @nocommit -// static size_t ZSTD_buildSeqStoreUsingExternalMatchfinder( -// ZSTD_CCtx* zc, const void* src, size_t srcSize -// ) { -// if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { -// if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { -// ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); -// } else { -// ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); -// } -// return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ -// } -// ZSTD_resetSeqStore(&(zc->seqStore)); -// return 0; -// } - static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 0c922528982..1fab58b3685 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -382,6 +382,16 @@ typedef struct { ZSTD_entropyCTablesMetadata_t entropyMetadata; } ZSTD_blockSplitCtx; +/* Block-level sequence compression API */ +// @nocommit improve docs +typedef struct { + void* mState; + ZSTD_externalMatchFinder_F* mFinder; + ZSTD_externalMatchStateDestructor_F* mStateDestructor; + void* seqBuffer; // @nocommit change from void* to ZSTD_Sequence* + size_t seqBufferSize; +} ZSTD_externalMatchCtx; + struct ZSTD_CCtx_s { ZSTD_compressionStage_e stage; int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ @@ -453,7 +463,7 @@ struct ZSTD_CCtx_s { ZSTD_blockSplitCtx blockSplitCtx; /* External block matchfinder */ - ZSTD_externalMatchfinder_F* externalBlockMatchfinder; + ZSTD_externalMatchCtx externalMatchCtx; }; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; diff --git a/lib/zstd.h b/lib/zstd.h index 3b3b7293571..418e7d250f7 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1474,7 +1474,11 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, const ZSTD_Sequence* inSeqs, size_t inSeqsSize, const void* src, size_t srcSize); -typedef size_t ZSTD_externalMatchfinder_F ( +/* Block-level sequence compression API */ +// @nocommit improve docs + +// @nocommit move bounds comments into docs +typedef size_t ZSTD_externalMatchFinder_F ( void* externalMatchState, // TODO make this an externalMatchState type ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, // outSeqsCapacity >= blockSize / MINMATCH @@ -1483,16 +1487,21 @@ typedef size_t ZSTD_externalMatchfinder_F ( // historySize < srcSize ; any size ); +typedef void ZSTD_externalMatchStateDestructor_F ( + void* externalMatchState +); + // @nocommit document this ZSTDLIB_STATIC_API void -ZSTD_registerExternalMatchfinder(ZSTD_CCtx* cctx, ZSTD_externalMatchfinder_F* externalBlockMatchfinder); - -// @nocommit -size_t DONT_COMMIT_simpleExternalMatchfinder( - void* matchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, - const void* src, size_t srcSize, size_t historySize +ZSTD_registerExternalMatchfinder( + ZSTD_CCtx* cctx, + void* externalMatchState, + ZSTD_externalMatchFinder_F* externalMatchFinder, + ZSTD_externalMatchStateDestructor_F* externalMatchStateDestructor ); +/****************************************/ + /*! ZSTD_writeSkippableFrame() : * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. * @@ -2658,6 +2667,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ + #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ #if defined (__cplusplus) From 297de1c2a49db0740db0dee57a2c0f0ec445c343 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 20 Nov 2022 19:07:12 -0700 Subject: [PATCH 03/34] move seqBuffer to ZSTD_Sequence* --- lib/compress/zstd_compress.c | 8 ++++---- lib/compress/zstd_compress_internal.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 71da3553cd3..bc7e25d4273 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2868,15 +2868,15 @@ void ZSTD_registerExternalMatchfinder( ZSTD_externalMatchStateDestructor_F* mStateDestructor ) { // @nocommit Call clearExternalMatchContext() - size_t const seqBufferSize = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX) * sizeof(ZSTD_Sequence); - void* seqBuffer = ZSTD_malloc(seqBufferSize); + size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); + ZSTD_Sequence* seqBuffer = (ZSTD_Sequence*)ZSTD_malloc(seqBufferCapacity * sizeof(ZSTD_Sequence)); ZSTD_externalMatchCtx emctx = { mState, mFinder, mStateDestructor, seqBuffer, - seqBufferSize + seqBufferCapacity }; zc->externalMatchCtx = emctx; } @@ -2953,7 +2953,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) assert(ldmSeqStore.pos == ldmSeqStore.size); } else if (1 /* @nocommit: change to a cparam */) { size_t numSeqsFound = (zc->externalMatchCtx.mFinder)( - NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferSize, src, srcSize, 0 + NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, 0 ); ZSTD_sequencePosition seqPos = {0,0,0}; lastLLSize = 0; // @nocommit diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 1fab58b3685..e452c910c1c 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -388,8 +388,8 @@ typedef struct { void* mState; ZSTD_externalMatchFinder_F* mFinder; ZSTD_externalMatchStateDestructor_F* mStateDestructor; - void* seqBuffer; // @nocommit change from void* to ZSTD_Sequence* - size_t seqBufferSize; + ZSTD_Sequence* seqBuffer; + size_t seqBufferCapacity; } ZSTD_externalMatchCtx; struct ZSTD_CCtx_s { From 3d3cc4eaef94975b9dc4525739c115aa385b931a Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 20 Nov 2022 19:12:11 -0700 Subject: [PATCH 04/34] support non-contiguous dictionary --- contrib/externalMatchfinder/matchfinder.c | 5 +++-- contrib/externalMatchfinder/matchfinder.h | 2 +- lib/compress/zstd_compress.c | 3 ++- lib/zstd.h | 3 ++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index b8237f8d660..c974791b69f 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -9,10 +9,11 @@ static U32 const BADIDX = (1 << 31); size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, - const void* src, size_t srcSize, size_t historySize + const void* src, size_t srcSize, const void* dict, size_t dictSize ) { (void)(externalMatchState); - (void)(historySize); + (void)(dict); + (void)(dictSize); (void)(outSeqsCapacity); // @nocommit return an error const BYTE* const istart = (const BYTE*)src; diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index 74041e25265..5ad0909c6b2 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -6,7 +6,7 @@ size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, - const void* src, size_t srcSize, size_t historySize + const void* src, size_t srcSize, const void* dict, size_t dictSize ); void simpleExternalMatchStateDestructor(void* matchState); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index bc7e25d4273..a20dcaeba28 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2952,8 +2952,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); } else if (1 /* @nocommit: change to a cparam */) { + // @nocommit document lack of dictionary support in function call size_t numSeqsFound = (zc->externalMatchCtx.mFinder)( - NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, 0 + NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, NULL, 0 ); ZSTD_sequencePosition seqPos = {0,0,0}; lastLLSize = 0; // @nocommit diff --git a/lib/zstd.h b/lib/zstd.h index 418e7d250f7..c5d0982d2f1 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1482,7 +1482,8 @@ typedef size_t ZSTD_externalMatchFinder_F ( void* externalMatchState, // TODO make this an externalMatchState type ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, // outSeqsCapacity >= blockSize / MINMATCH - const void* src, size_t srcSize, size_t historySize + const void* src, size_t srcSize, + const void* dict, size_t dictSize // srcSize - historySize <= 128 KB // historySize < srcSize ; any size ); From 46d3dc8e7e3598688aba2afdcdc11c9f84568a1a Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 20 Nov 2022 19:13:32 -0700 Subject: [PATCH 05/34] clean up parens --- contrib/externalMatchfinder/matchfinder.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index c974791b69f..aa20aacbe73 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -11,10 +11,10 @@ size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize ) { - (void)(externalMatchState); - (void)(dict); - (void)(dictSize); - (void)(outSeqsCapacity); // @nocommit return an error + (void)externalMatchState; + (void)dict; + (void)dictSize; + (void)outSeqsCapacity; // @nocommit return an error const BYTE* const istart = (const BYTE*)src; const BYTE* const iend = istart + srcSize; From 6ed7d055f7ff7475a03c44e7eda4e7ebbaa7233e Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 20 Nov 2022 19:53:32 -0700 Subject: [PATCH 06/34] add clearExternalMatchfinder, handle allocation errors --- contrib/externalMatchfinder/main.c | 17 +++++++++++++---- lib/compress/zstd_compress.c | 24 ++++++++++++++++++++++-- lib/zstd.h | 12 +++++++++--- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 2d6c547dbd3..0133bdcae66 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -10,18 +10,25 @@ const size_t ZSTD_LEVEL = 1; int main(int argc, char *argv[]) { + size_t res; + + if (argc != 2) { + printf("Usage: exampleMatchfinder \n"); + return 1; + } + ZSTD_CCtx* zc = ZSTD_createCCtx(); // Here is the crucial bit of code! - ZSTD_registerExternalMatchfinder( + res = ZSTD_registerExternalMatchFinder( zc, NULL, simpleExternalMatchFinder, simpleExternalMatchStateDestructor ); - if (argc != 2) { - printf("Usage: exampleMatchfinder \n"); + if (ZSTD_isError(res)) { + printf("ERROR: %s\n", ZSTD_getErrorString(res)); return 1; } @@ -45,7 +52,9 @@ int main(int argc, char *argv[]) { } char *val = malloc(srcSize); - size_t res = ZSTD_decompress(val, srcSize, dst, cSize); + res = ZSTD_decompress(val, srcSize, dst, cSize); + + ZSTD_freeCCtx(zc); if (ZSTD_isError(res)) { printf("ERROR: %s\n", ZSTD_getErrorString(res)); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index a20dcaeba28..c4c360b2f61 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -170,6 +170,7 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) assert(cctx != NULL); assert(cctx->staticSize == 0); ZSTD_clearAllDicts(cctx); + ZSTD_clearExternalMatchFinder(cctx); #ifdef ZSTD_MULTITHREAD ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; #endif @@ -2862,14 +2863,18 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; // @nocommit External matchfinder, need to move this at some point -void ZSTD_registerExternalMatchfinder( +size_t ZSTD_registerExternalMatchFinder( ZSTD_CCtx* zc, void* mState, ZSTD_externalMatchFinder_F* mFinder, ZSTD_externalMatchStateDestructor_F* mStateDestructor ) { - // @nocommit Call clearExternalMatchContext() + ZSTD_clearExternalMatchFinder(zc); + size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); ZSTD_Sequence* seqBuffer = (ZSTD_Sequence*)ZSTD_malloc(seqBufferCapacity * sizeof(ZSTD_Sequence)); + if (seqBuffer == NULL) { + return ZSTD_error_memory_allocation; + } ZSTD_externalMatchCtx emctx = { mState, @@ -2879,6 +2884,21 @@ void ZSTD_registerExternalMatchfinder( seqBufferCapacity }; zc->externalMatchCtx = emctx; + + return ZSTD_error_no_error; +} + +void ZSTD_clearExternalMatchFinder( + ZSTD_CCtx* zc +) { + ZSTD_externalMatchCtx const emctx = zc->externalMatchCtx; + if (emctx.mFinder != NULL) { + if (emctx.mStateDestructor != NULL) { + (emctx.mStateDestructor)(emctx.mState); + } + ZSTD_free(emctx.seqBuffer); + ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx)); + } } static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) diff --git a/lib/zstd.h b/lib/zstd.h index c5d0982d2f1..d866be5efed 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1492,15 +1492,21 @@ typedef void ZSTD_externalMatchStateDestructor_F ( void* externalMatchState ); -// @nocommit document this -ZSTDLIB_STATIC_API void -ZSTD_registerExternalMatchfinder( +ZSTDLIB_STATIC_API size_t +ZSTD_registerExternalMatchFinder( ZSTD_CCtx* cctx, void* externalMatchState, ZSTD_externalMatchFinder_F* externalMatchFinder, ZSTD_externalMatchStateDestructor_F* externalMatchStateDestructor ); +/* Note: calls the previously-registered ZSTD_externalMatchStateDestructor_F* + * if it is non-NULL. */ +ZSTDLIB_STATIC_API void +ZSTD_clearExternalMatchFinder( + ZSTD_CCtx* cctx +); + /****************************************/ /*! ZSTD_writeSkippableFrame() : From d4ecd79938d0c4ec3a130d7d498132f8e535e37f Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 22 Nov 2022 21:48:56 -0500 Subject: [PATCH 07/34] Add useExternalMatchfinder cParam --- contrib/externalMatchfinder/main.c | 7 +++++++ lib/compress/zstd_compress.c | 17 ++++++++++++++++- lib/compress/zstd_compress_internal.h | 3 +++ lib/zstd.h | 7 ++++++- 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 0133bdcae66..c831ee8da34 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -32,6 +32,13 @@ int main(int argc, char *argv[]) { return 1; } + res = ZSTD_CCtx_setParameter(zc, ZSTD_c_useExternalMatchfinder, 1); + + if (ZSTD_isError(res)) { + printf("ERROR: %s\n", ZSTD_getErrorString(res)); + return 1; + } + FILE *f = fopen(argv[1], "rb"); fseek(f, 0, SEEK_END); long srcSize = ftell(f); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c4c360b2f61..08e8446dd72 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -585,6 +585,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = (int)ZSTD_ps_disable; return bounds; + case ZSTD_c_useExternalMatchfinder: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + default: bounds.error = ERROR(parameter_unsupported); return bounds; @@ -650,6 +655,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: + case ZSTD_c_useExternalMatchfinder: default: return 0; } @@ -706,6 +712,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: + case ZSTD_c_useExternalMatchfinder: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -937,6 +944,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value; return CCtxParams->prefetchCDictTables; + case ZSTD_c_useExternalMatchfinder: + BOUNDCHECK(ZSTD_c_useExternalMatchfinder, value); + CCtxParams->useExternalMatchfinder = value; + return CCtxParams->useExternalMatchfinder; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -1072,6 +1084,9 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_prefetchCDictTables: *value = (int)CCtxParams->prefetchCDictTables; break; + case ZSTD_c_useExternalMatchfinder: + *value = CCtxParams->useExternalMatchfinder; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -2971,7 +2986,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); - } else if (1 /* @nocommit: change to a cparam */) { + } else if (zc->appliedParams.useExternalMatchfinder) { // @nocommit document lack of dictionary support in function call size_t numSeqsFound = (zc->externalMatchCtx.mFinder)( NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, NULL, 0 diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index e452c910c1c..0a67d056b98 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -351,6 +351,9 @@ struct ZSTD_CCtx_params_s { /* Controls prefetching in some dictMatchState matchfinders */ ZSTD_paramSwitch_e prefetchCDictTables; + + /* Controls block-level sequence compression API */ + int useExternalMatchfinder; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) diff --git a/lib/zstd.h b/lib/zstd.h index d866be5efed..5822704a9a2 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -446,6 +446,7 @@ typedef enum { * ZSTD_c_useBlockSplitter * ZSTD_c_useRowMatchFinder * ZSTD_c_prefetchCDictTables + * ZSTD_c_useExternalMatchfinder * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -465,7 +466,8 @@ typedef enum { ZSTD_c_experimentalParam13=1010, ZSTD_c_experimentalParam14=1011, ZSTD_c_experimentalParam15=1012, - ZSTD_c_experimentalParam16=1013 + ZSTD_c_experimentalParam16=1013, + ZSTD_c_experimentalParam17=1014 } ZSTD_cParameter; typedef struct { @@ -2044,6 +2046,9 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo */ #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 +// @nocommit document +#define ZSTD_c_useExternalMatchfinder ZSTD_c_experimentalParam17 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. From 06abdf828876fae7215907263a0d464f54861a07 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 23 Nov 2022 10:55:43 -0500 Subject: [PATCH 08/34] validate useExternalMatchfinder cParam --- contrib/externalMatchfinder/main.c | 8 ++++---- lib/compress/zstd_compress.c | 14 +++++++++++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index c831ee8da34..eaba605b6af 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -28,14 +28,14 @@ int main(int argc, char *argv[]) { ); if (ZSTD_isError(res)) { - printf("ERROR: %s\n", ZSTD_getErrorString(res)); + printf("ERROR: %s\n", ZSTD_getErrorName(res)); return 1; } res = ZSTD_CCtx_setParameter(zc, ZSTD_c_useExternalMatchfinder, 1); if (ZSTD_isError(res)) { - printf("ERROR: %s\n", ZSTD_getErrorString(res)); + printf("ERROR: %s\n", ZSTD_getErrorName(res)); return 1; } @@ -54,7 +54,7 @@ int main(int argc, char *argv[]) { size_t cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize); if (ZSTD_isError(cSize)) { - printf("ERROR: %s\n", ZSTD_getErrorString(cSize)); + printf("ERROR: %s\n", ZSTD_getErrorName(cSize)); return 1; } @@ -64,7 +64,7 @@ int main(int argc, char *argv[]) { ZSTD_freeCCtx(zc); if (ZSTD_isError(res)) { - printf("ERROR: %s\n", ZSTD_getErrorString(res)); + printf("ERROR: %s\n", ZSTD_getErrorName(res)); return 1; } diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 08e8446dd72..0960e956ea6 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -678,6 +678,14 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) "MT not compatible with static alloc"); break; + case ZSTD_c_useExternalMatchfinder: + RETURN_ERROR_IF( + (value == 1) && (cctx->externalMatchCtx.mFinder == NULL), + parameter_unsupported, + "Must register an external matchfinder to set useExternalMatchfinder=1" + ); + break; + case ZSTD_c_compressionLevel: case ZSTD_c_windowLog: case ZSTD_c_hashLog: @@ -712,7 +720,6 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: - case ZSTD_c_useExternalMatchfinder: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -2970,6 +2977,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) src, srcSize); assert(zc->externSeqStore.pos <= zc->externSeqStore.size); } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { + // @nocommit Check for LDM, fail if useExternalMatchfinder is set rawSeqStore_t ldmSeqStore = kNullRawSeqStore; ldmSeqStore.seq = zc->ldmSequences; @@ -2988,6 +2996,10 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) assert(ldmSeqStore.pos == ldmSeqStore.size); } else if (zc->appliedParams.useExternalMatchfinder) { // @nocommit document lack of dictionary support in function call + RETURN_ERROR_IF( + zc->externalMatchCtx.mFinder == NULL, parameter_unsupported, + "useExternalMatchfinder=1 but no external matchfinder is registered!" + ); size_t numSeqsFound = (zc->externalMatchCtx.mFinder)( NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, NULL, 0 ); From 875c3ae090d552a38ec914f0e8e22ec04e2fbf38 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 23 Nov 2022 15:36:55 -0500 Subject: [PATCH 09/34] Disable LDM + external matchfinder --- lib/compress/zstd_compress.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 0960e956ea6..761e8714378 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2930,6 +2930,16 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) assert(srcSize <= ZSTD_BLOCKSIZE_MAX); /* Assert that we have correctly flushed the ctx params into the ms's copy */ ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); + + /* External matchfinder + LDM is technically possible, just not implemented yet. + * We need to revisit soon and implement it. */ + RETURN_ERROR_IF( + (zc->appliedParams.useExternalMatchfinder == 1) && + (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable), + parameter_unsupported, // @nocommit Make this a parameter *combination* error + "Long-distance matching with external matchfinder enabled is not currently supported." + ); + /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding * additional 1. We need to revisit and change this logic to be more consistent */ if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { @@ -2977,7 +2987,6 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) src, srcSize); assert(zc->externSeqStore.pos <= zc->externSeqStore.size); } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { - // @nocommit Check for LDM, fail if useExternalMatchfinder is set rawSeqStore_t ldmSeqStore = kNullRawSeqStore; ldmSeqStore.seq = zc->ldmSequences; From a3580e858ff641c14c34fc9fc407b6af379a3d02 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 23 Nov 2022 16:58:53 -0500 Subject: [PATCH 10/34] Check for static CCtx --- lib/compress/zstd_compress.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 761e8714378..403a7adceab 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2890,6 +2890,11 @@ size_t ZSTD_registerExternalMatchFinder( ZSTD_externalMatchFinder_F* mFinder, ZSTD_externalMatchStateDestructor_F* mStateDestructor ) { + RETURN_ERROR_IF( + zc->staticSize, parameter_unsupported, + "External matchfinder is not compatible with static alloc" + ); + ZSTD_clearExternalMatchFinder(zc); size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); From c660b00322fe36a461445c892ccb67f7b48c5e4e Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 23 Nov 2022 17:18:59 -0500 Subject: [PATCH 11/34] Validate mState and mStateDestructor --- contrib/externalMatchfinder/main.c | 4 +++- lib/compress/zstd_compress.c | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index eaba605b6af..af741245957 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -19,10 +19,12 @@ int main(int argc, char *argv[]) { ZSTD_CCtx* zc = ZSTD_createCCtx(); + int simpleExternalMatchState = 0xdeadbeef; // @nocommit + // Here is the crucial bit of code! res = ZSTD_registerExternalMatchFinder( zc, - NULL, + &simpleExternalMatchState, simpleExternalMatchFinder, simpleExternalMatchStateDestructor ); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 403a7adceab..86cd50caca1 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2895,6 +2895,12 @@ size_t ZSTD_registerExternalMatchFinder( "External matchfinder is not compatible with static alloc" ); + RETURN_ERROR_IF( + (mState && !mStateDestructor) || (!mState && mStateDestructor), + GENERIC, + "Cannot provide external match state without corresponding destructor, or vice versa!" + ); + ZSTD_clearExternalMatchFinder(zc); size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); From d574df84abbfa2c5560e0d5369d0e75983b4b3fd Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 23 Nov 2022 23:28:35 -0500 Subject: [PATCH 12/34] Improve LDM check to cover both branches --- lib/compress/zstd_compress.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 86cd50caca1..a883d9e5ff7 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2942,15 +2942,6 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* Assert that we have correctly flushed the ctx params into the ms's copy */ ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); - /* External matchfinder + LDM is technically possible, just not implemented yet. - * We need to revisit soon and implement it. */ - RETURN_ERROR_IF( - (zc->appliedParams.useExternalMatchfinder == 1) && - (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable), - parameter_unsupported, // @nocommit Make this a parameter *combination* error - "Long-distance matching with external matchfinder enabled is not currently supported." - ); - /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding * additional 1. We need to revisit and change this logic to be more consistent */ if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { @@ -2989,6 +2980,24 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) } if (zc->externSeqStore.pos < zc->externSeqStore.size) { assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable); + + /* External matchfinder + LDM is technically possible, just not implemented yet. + * We need to revisit soon and implement it. */ + if (zc->appliedParams.useBlockSplitter == ZSTD_ps_enable) { + RETURN_ERROR_IF( + zc->appliedParams.useExternalMatchfinder == 1, + parameter_unsupported, // @nocommit Make this a parameter *combination* error? + "Block splitting with external matchfinder enabled is not currently supported. " + "Note: block splitting is enabled by default at high compression levels." + ); + } else { + RETURN_ERROR_IF( + zc->appliedParams.useExternalMatchfinder == 1, + parameter_unsupported, // @nocommit Make this a parameter *combination* error? + "Long-distance matching with external matchfinder enabled is not currently supported." + ); + } + /* Updates ldmSeqStore.pos */ lastLLSize = ZSTD_ldm_blockCompress(&zc->externSeqStore, @@ -3000,6 +3009,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { rawSeqStore_t ldmSeqStore = kNullRawSeqStore; + /* External matchfinder + LDM is technically possible, just not implemented yet. + * We need to revisit soon and implement it. */ + RETURN_ERROR_IF( + zc->appliedParams.useExternalMatchfinder == 1, + parameter_unsupported, // @nocommit Make this a parameter *combination* error? + "Long-distance matching with external matchfinder enabled is not currently supported." + ); + ldmSeqStore.seq = zc->ldmSequences; ldmSeqStore.capacity = zc->maxNbLdmSequences; /* Updates ldmSeqStore.size */ From bd249d4538b7cea1659e78b5ab75df63affa5519 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 23 Nov 2022 23:44:59 -0500 Subject: [PATCH 13/34] Error API with optional fallback --- contrib/externalMatchfinder/main.c | 7 ++++ contrib/externalMatchfinder/matchfinder.c | 15 ++++---- contrib/externalMatchfinder/matchfinder.h | 2 +- lib/common/error_private.c | 1 + lib/compress/zstd_compress.c | 43 +++++++++++++++++++---- lib/compress/zstd_compress_internal.h | 4 +++ lib/zstd.h | 32 +++++++++++++---- lib/zstd_errors.h | 1 + 8 files changed, 83 insertions(+), 22 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index af741245957..a504a956614 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -41,6 +41,13 @@ int main(int argc, char *argv[]) { return 1; } + res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchfinderFallback, 1); + + if (ZSTD_isError(res)) { + printf("ERROR: %s\n", ZSTD_getErrorName(res)); + return 1; + } + FILE *f = fopen(argv[1], "rb"); fseek(f, 0, SEEK_END); long srcSize = ftell(f); diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index aa20aacbe73..c88126f6800 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -7,7 +7,7 @@ static U32 const HLOG = 10; static U32 const MLS = 4; static U32 const BADIDX = (1 << 31); -size_t simpleExternalMatchFinder( +ZSTD_externalMatchResult simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize ) { @@ -52,14 +52,13 @@ size_t simpleExternalMatchFinder( ip++; } - { - ZSTD_Sequence const finalSeq = { - 0, iend - anchor, 0, 0 - }; - outSeqs[seqCount] = finalSeq; - } + ZSTD_Sequence const finalSeq = { + 0, iend - anchor, 0, 0 + }; + outSeqs[seqCount] = finalSeq; - return seqCount; + ZSTD_externalMatchResult res = {seqCount, ZSTD_emf_error_none}; + return res; } void simpleExternalMatchStateDestructor(void* externalMatchState) { diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index 5ad0909c6b2..18a02efdf61 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -4,7 +4,7 @@ #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" -size_t simpleExternalMatchFinder( +ZSTD_externalMatchResult simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize ); diff --git a/lib/common/error_private.c b/lib/common/error_private.c index 1b67500f3bb..f23a6cf4bca 100644 --- a/lib/common/error_private.c +++ b/lib/common/error_private.c @@ -50,6 +50,7 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; + case PREFIX(externalMatchFinder_failed): return "External matchfinder returned a non-zero error code"; case PREFIX(maxCode): default: return notErrorCode; } diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index a883d9e5ff7..68d441ee5a3 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -590,6 +590,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = 1; return bounds; + case ZSTD_c_enableMatchfinderFallback: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + default: bounds.error = ERROR(parameter_unsupported); return bounds; @@ -656,6 +661,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: case ZSTD_c_useExternalMatchfinder: + case ZSTD_c_enableMatchfinderFallback: default: return 0; } @@ -720,6 +726,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: + case ZSTD_c_enableMatchfinderFallback: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -956,6 +963,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->useExternalMatchfinder = value; return CCtxParams->useExternalMatchfinder; + case ZSTD_c_enableMatchfinderFallback: + BOUNDCHECK(ZSTD_c_enableMatchfinderFallback, value); + CCtxParams->enableMatchfinderFallback = value; + return CCtxParams->enableMatchfinderFallback; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -1094,6 +1106,9 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_useExternalMatchfinder: *value = CCtxParams->useExternalMatchfinder; break; + case ZSTD_c_enableMatchfinderFallback: + *value = CCtxParams->enableMatchfinderFallback; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -3037,14 +3052,30 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->externalMatchCtx.mFinder == NULL, parameter_unsupported, "useExternalMatchfinder=1 but no external matchfinder is registered!" ); - size_t numSeqsFound = (zc->externalMatchCtx.mFinder)( + ZSTD_externalMatchResult matchResult = (zc->externalMatchCtx.mFinder)( NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, NULL, 0 ); - ZSTD_sequencePosition seqPos = {0,0,0}; - lastLLSize = 0; // @nocommit - ZSTD_copySequencesToSeqStoreExplicitBlockDelim( - zc, &seqPos, zc->externalMatchCtx.seqBuffer, numSeqsFound, src, srcSize - ); + if (matchResult.errorCode == ZSTD_emf_error_none) { + ZSTD_sequencePosition seqPos = {0,0,0}; + ms->ldmSeqStore = NULL; + lastLLSize = 0; + ZSTD_copySequencesToSeqStoreExplicitBlockDelim( + zc, &seqPos, zc->externalMatchCtx.seqBuffer, matchResult.nbSeqsFound, src, srcSize + ); + } else { + if (zc->appliedParams.enableMatchfinderFallback) { + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); + ms->ldmSeqStore = NULL; + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + } else { + RETURN_ERROR( + externalMatchFinder_failed, + "External matchfinder returned a non-zero error code!" + ); + } + } } else { /* not long range mode */ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 0a67d056b98..fdbb1323f47 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -354,6 +354,10 @@ struct ZSTD_CCtx_params_s { /* Controls block-level sequence compression API */ int useExternalMatchfinder; + + /* Controls whether zstd will fall back to an internal matchfinder + * when the external matchfinder returns a non-zero error code. */ + int enableMatchfinderFallback; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) diff --git a/lib/zstd.h b/lib/zstd.h index 5822704a9a2..b33ae503337 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -447,6 +447,7 @@ typedef enum { * ZSTD_c_useRowMatchFinder * ZSTD_c_prefetchCDictTables * ZSTD_c_useExternalMatchfinder + * ZSTD_c_enableMatchfinderFallback * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -467,7 +468,8 @@ typedef enum { ZSTD_c_experimentalParam14=1011, ZSTD_c_experimentalParam15=1012, ZSTD_c_experimentalParam16=1013, - ZSTD_c_experimentalParam17=1014 + ZSTD_c_experimentalParam17=1014, + ZSTD_c_experimentalParam18=1015 } ZSTD_cParameter; typedef struct { @@ -1479,15 +1481,29 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, /* Block-level sequence compression API */ // @nocommit improve docs +typedef enum { + /* External matchfinder successfully compressed the block. */ + ZSTD_emf_error_none = 0, + + /* External matchfinder hit an unspecified failure condition. + * Will fail the overall compression operation. */ + ZSTD_emf_error_generic= 1, +} ZSTD_externalMatchFinder_errorCode_e; + +typedef struct { + size_t nbSeqsFound; + ZSTD_externalMatchFinder_errorCode_e errorCode; +} ZSTD_externalMatchResult; + // @nocommit move bounds comments into docs -typedef size_t ZSTD_externalMatchFinder_F ( - void* externalMatchState, // TODO make this an externalMatchState type +typedef ZSTD_externalMatchResult ZSTD_externalMatchFinder_F ( + void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, // outSeqsCapacity >= blockSize / MINMATCH const void* src, size_t srcSize, const void* dict, size_t dictSize - // srcSize - historySize <= 128 KB - // historySize < srcSize ; any size + // srcSize <= 128 KB + // dictSize is not bounded ); typedef void ZSTD_externalMatchStateDestructor_F ( @@ -1502,8 +1518,7 @@ ZSTD_registerExternalMatchFinder( ZSTD_externalMatchStateDestructor_F* externalMatchStateDestructor ); -/* Note: calls the previously-registered ZSTD_externalMatchStateDestructor_F* - * if it is non-NULL. */ +/* Note: calls the previously-registered destructor if it is not NULL. */ ZSTDLIB_STATIC_API void ZSTD_clearExternalMatchFinder( ZSTD_CCtx* cctx @@ -2049,6 +2064,9 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo // @nocommit document #define ZSTD_c_useExternalMatchfinder ZSTD_c_experimentalParam17 +// @nocommit document +#define ZSTD_c_enableMatchfinderFallback ZSTD_c_experimentalParam18 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. diff --git a/lib/zstd_errors.h b/lib/zstd_errors.h index 2ec0b0ab168..723b3c42def 100644 --- a/lib/zstd_errors.h +++ b/lib/zstd_errors.h @@ -79,6 +79,7 @@ typedef enum { ZSTD_error_seekableIO = 102, ZSTD_error_dstBuffer_wrong = 104, ZSTD_error_srcBuffer_wrong = 105, + ZSTD_error_externalMatchFinder_failed = 106, ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ } ZSTD_ErrorCode; From 2a007871001c8a0a68fa80877b3971ef10b665ec Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sat, 26 Nov 2022 20:48:49 -0500 Subject: [PATCH 14/34] handle RLE properly for external matchfinder --- lib/compress/zstd_compress.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 68d441ee5a3..0f22338029b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3222,12 +3222,18 @@ static int ZSTD_isRLE(const BYTE* src, size_t length) { * This is just a heuristic based on the compressibility. * It may return both false positives and false negatives. */ -static int ZSTD_maybeRLE(seqStore_t const* seqStore) +static int ZSTD_maybeRLE(seqStore_t const* seqStore, ZSTD_CCtx_params const* appliedParams) { size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); - return nbSeqs < 4 && nbLits < 10; + if (appliedParams->useExternalMatchfinder) { + /* We shouldn't make any assumptions about how an external matchfinder + * will compress an RLE block. */ + return 1; + } else { + return nbSeqs < 4 && nbLits < 10; + } } static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) @@ -4043,7 +4049,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, * This is only an issue for zstd <= v1.4.3 */ !zc->isFirstBlock && - ZSTD_maybeRLE(&zc->seqStore) && + ZSTD_maybeRLE(&zc->seqStore, &zc->appliedParams) && ZSTD_isRLE((BYTE const*)src, srcSize)) { return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); @@ -6396,7 +6402,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); if (!cctx->isFirstBlock && - ZSTD_maybeRLE(&cctx->seqStore) && + ZSTD_maybeRLE(&cctx->seqStore, &cctx->appliedParams) && ZSTD_isRLE((BYTE const*)src, srcSize)) { /* We don't want to emit our first block as a RLE even if it qualifies because * doing so will cause the decoder (cli only) to throw a "should consume all input error." From bb48c167f07c68b8178486d6bc0a3919166cc9dd Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sat, 26 Nov 2022 21:32:17 -0500 Subject: [PATCH 15/34] nit --- contrib/externalMatchfinder/main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index a504a956614..b19e59c008a 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -7,8 +7,6 @@ #include "zstd_errors.h" #include "matchfinder.h" // simpleExternalMatchFinder, simpleExternalMatchStateDestructor -const size_t ZSTD_LEVEL = 1; - int main(int argc, char *argv[]) { size_t res; From 06b3607362320de9176ff8ba22de75f8e8660acb Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 29 Nov 2022 17:31:41 -0500 Subject: [PATCH 16/34] Move to a CDict-like model for resource ownership --- contrib/externalMatchfinder/main.c | 17 +-- contrib/externalMatchfinder/matchfinder.c | 9 +- contrib/externalMatchfinder/matchfinder.h | 4 +- lib/common/error_private.c | 2 +- lib/compress/zstd_compress.c | 135 +++++++--------------- lib/compress/zstd_compress_internal.h | 8 +- lib/zstd.h | 44 ++----- 7 files changed, 58 insertions(+), 161 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index b19e59c008a..2b216485694 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -20,25 +20,12 @@ int main(int argc, char *argv[]) { int simpleExternalMatchState = 0xdeadbeef; // @nocommit // Here is the crucial bit of code! - res = ZSTD_registerExternalMatchFinder( + ZSTD_refExternalMatchFinder( zc, &simpleExternalMatchState, - simpleExternalMatchFinder, - simpleExternalMatchStateDestructor + simpleExternalMatchFinder ); - if (ZSTD_isError(res)) { - printf("ERROR: %s\n", ZSTD_getErrorName(res)); - return 1; - } - - res = ZSTD_CCtx_setParameter(zc, ZSTD_c_useExternalMatchfinder, 1); - - if (ZSTD_isError(res)) { - printf("ERROR: %s\n", ZSTD_getErrorName(res)); - return 1; - } - res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchfinderFallback, 1); if (ZSTD_isError(res)) { diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index c88126f6800..5962e9635d8 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -7,7 +7,7 @@ static U32 const HLOG = 10; static U32 const MLS = 4; static U32 const BADIDX = (1 << 31); -ZSTD_externalMatchResult simpleExternalMatchFinder( +size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize ) { @@ -57,10 +57,5 @@ ZSTD_externalMatchResult simpleExternalMatchFinder( }; outSeqs[seqCount] = finalSeq; - ZSTD_externalMatchResult res = {seqCount, ZSTD_emf_error_none}; - return res; -} - -void simpleExternalMatchStateDestructor(void* externalMatchState) { - (void)externalMatchState; + return seqCount; } diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index 18a02efdf61..3fc62f893ab 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -4,11 +4,9 @@ #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" -ZSTD_externalMatchResult simpleExternalMatchFinder( +size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize ); -void simpleExternalMatchStateDestructor(void* matchState); - #endif diff --git a/lib/common/error_private.c b/lib/common/error_private.c index f23a6cf4bca..42f7e070191 100644 --- a/lib/common/error_private.c +++ b/lib/common/error_private.c @@ -50,7 +50,7 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; - case PREFIX(externalMatchFinder_failed): return "External matchfinder returned a non-zero error code"; + case PREFIX(externalMatchFinder_failed): return "External matchfinder returned an error code"; case PREFIX(maxCode): default: return notErrorCode; } diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 0f22338029b..b9672e70094 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -170,7 +170,6 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) assert(cctx != NULL); assert(cctx->staticSize == 0); ZSTD_clearAllDicts(cctx); - ZSTD_clearExternalMatchFinder(cctx); #ifdef ZSTD_MULTITHREAD ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; #endif @@ -585,11 +584,6 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = (int)ZSTD_ps_disable; return bounds; - case ZSTD_c_useExternalMatchfinder: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - case ZSTD_c_enableMatchfinderFallback: bounds.lowerBound = 0; bounds.upperBound = 1; @@ -660,7 +654,6 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: - case ZSTD_c_useExternalMatchfinder: case ZSTD_c_enableMatchfinderFallback: default: return 0; @@ -684,14 +677,6 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) "MT not compatible with static alloc"); break; - case ZSTD_c_useExternalMatchfinder: - RETURN_ERROR_IF( - (value == 1) && (cctx->externalMatchCtx.mFinder == NULL), - parameter_unsupported, - "Must register an external matchfinder to set useExternalMatchfinder=1" - ); - break; - case ZSTD_c_compressionLevel: case ZSTD_c_windowLog: case ZSTD_c_hashLog: @@ -958,11 +943,6 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value; return CCtxParams->prefetchCDictTables; - case ZSTD_c_useExternalMatchfinder: - BOUNDCHECK(ZSTD_c_useExternalMatchfinder, value); - CCtxParams->useExternalMatchfinder = value; - return CCtxParams->useExternalMatchfinder; - case ZSTD_c_enableMatchfinderFallback: BOUNDCHECK(ZSTD_c_enableMatchfinderFallback, value); CCtxParams->enableMatchfinderFallback = value; @@ -1103,9 +1083,6 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_prefetchCDictTables: *value = (int)CCtxParams->prefetchCDictTables; break; - case ZSTD_c_useExternalMatchfinder: - *value = CCtxParams->useExternalMatchfinder; - break; case ZSTD_c_enableMatchfinderFallback: *value = CCtxParams->enableMatchfinderFallback; break; @@ -2899,56 +2876,6 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; -// @nocommit External matchfinder, need to move this at some point -size_t ZSTD_registerExternalMatchFinder( - ZSTD_CCtx* zc, void* mState, - ZSTD_externalMatchFinder_F* mFinder, - ZSTD_externalMatchStateDestructor_F* mStateDestructor -) { - RETURN_ERROR_IF( - zc->staticSize, parameter_unsupported, - "External matchfinder is not compatible with static alloc" - ); - - RETURN_ERROR_IF( - (mState && !mStateDestructor) || (!mState && mStateDestructor), - GENERIC, - "Cannot provide external match state without corresponding destructor, or vice versa!" - ); - - ZSTD_clearExternalMatchFinder(zc); - - size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); - ZSTD_Sequence* seqBuffer = (ZSTD_Sequence*)ZSTD_malloc(seqBufferCapacity * sizeof(ZSTD_Sequence)); - if (seqBuffer == NULL) { - return ZSTD_error_memory_allocation; - } - - ZSTD_externalMatchCtx emctx = { - mState, - mFinder, - mStateDestructor, - seqBuffer, - seqBufferCapacity - }; - zc->externalMatchCtx = emctx; - - return ZSTD_error_no_error; -} - -void ZSTD_clearExternalMatchFinder( - ZSTD_CCtx* zc -) { - ZSTD_externalMatchCtx const emctx = zc->externalMatchCtx; - if (emctx.mFinder != NULL) { - if (emctx.mStateDestructor != NULL) { - (emctx.mStateDestructor)(emctx.mState); - } - ZSTD_free(emctx.seqBuffer); - ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx)); - } -} - static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) { ZSTD_matchState_t* const ms = &zc->blockState.matchState; @@ -3000,14 +2927,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) * We need to revisit soon and implement it. */ if (zc->appliedParams.useBlockSplitter == ZSTD_ps_enable) { RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchfinder == 1, + zc->externalMatchCtx.mFinder != NULL, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Block splitting with external matchfinder enabled is not currently supported. " "Note: block splitting is enabled by default at high compression levels." ); } else { RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchfinder == 1, + zc->externalMatchCtx.mFinder != NULL, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Long-distance matching with external matchfinder enabled is not currently supported." ); @@ -3027,7 +2954,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* External matchfinder + LDM is technically possible, just not implemented yet. * We need to revisit soon and implement it. */ RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchfinder == 1, + zc->externalMatchCtx.mFinder != NULL, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Long-distance matching with external matchfinder enabled is not currently supported." ); @@ -3046,22 +2973,26 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); - } else if (zc->appliedParams.useExternalMatchfinder) { - // @nocommit document lack of dictionary support in function call - RETURN_ERROR_IF( - zc->externalMatchCtx.mFinder == NULL, parameter_unsupported, - "useExternalMatchfinder=1 but no external matchfinder is registered!" + } else if (zc->externalMatchCtx.mFinder != NULL) { + assert( + zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize) ); - ZSTD_externalMatchResult matchResult = (zc->externalMatchCtx.mFinder)( - NULL, zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, NULL, 0 + + size_t nbExternalSeqs = (zc->externalMatchCtx.mFinder)( + zc->externalMatchCtx.mState, + zc->externalMatchCtx.seqBuffer, + zc->externalMatchCtx.seqBufferCapacity, + src, srcSize, + NULL, 0 /* dict and dictSize, currently not supported */ ); - if (matchResult.errorCode == ZSTD_emf_error_none) { + + if (nbExternalSeqs <= zc->externalMatchCtx.seqBufferCapacity) { ZSTD_sequencePosition seqPos = {0,0,0}; - ms->ldmSeqStore = NULL; - lastLLSize = 0; ZSTD_copySequencesToSeqStoreExplicitBlockDelim( - zc, &seqPos, zc->externalMatchCtx.seqBuffer, matchResult.nbSeqsFound, src, srcSize + zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbExternalSeqs, src, srcSize ); + ms->ldmSeqStore = NULL; + lastLLSize = 0; } else { if (zc->appliedParams.enableMatchfinderFallback) { ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, @@ -3072,11 +3003,11 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) } else { RETURN_ERROR( externalMatchFinder_failed, - "External matchfinder returned a non-zero error code!" + "External matchfinder returned an error code!" ); } } - } else { /* not long range mode */ + } else { /* not long range mode and no external matchfinder */ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, dictMode); @@ -3222,12 +3153,12 @@ static int ZSTD_isRLE(const BYTE* src, size_t length) { * This is just a heuristic based on the compressibility. * It may return both false positives and false negatives. */ -static int ZSTD_maybeRLE(seqStore_t const* seqStore, ZSTD_CCtx_params const* appliedParams) +static int ZSTD_maybeRLE(seqStore_t const* seqStore, ZSTD_externalMatchCtx const* externalMatchCtx) { size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); - if (appliedParams->useExternalMatchfinder) { + if (externalMatchCtx->mFinder != NULL) { /* We shouldn't make any assumptions about how an external matchfinder * will compress an RLE block. */ return 1; @@ -4049,7 +3980,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, * This is only an issue for zstd <= v1.4.3 */ !zc->isFirstBlock && - ZSTD_maybeRLE(&zc->seqStore, &zc->appliedParams) && + ZSTD_maybeRLE(&zc->seqStore, &zc->externalMatchCtx) && ZSTD_isRLE((BYTE const*)src, srcSize)) { return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); @@ -6402,7 +6333,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); if (!cctx->isFirstBlock && - ZSTD_maybeRLE(&cctx->seqStore, &cctx->appliedParams) && + ZSTD_maybeRLE(&cctx->seqStore, &cctx->externalMatchCtx) && ZSTD_isRLE((BYTE const*)src, srcSize)) { /* We don't want to emit our first block as a RLE even if it qualifies because * doing so will cause the decoder (cli only) to throw a "should consume all input error." @@ -6675,3 +6606,21 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); } + +void ZSTD_refExternalMatchFinder( + ZSTD_CCtx* zc, void* mState, + ZSTD_externalMatchFinder_F* mFinder +) { + size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); + ZSTD_Sequence* const seqBuffer = malloc(seqBufferCapacity * sizeof(ZSTD_Sequence)); + + ZSTD_externalMatchCtx emctx = { + mState, + mFinder, + + /* @nocommit seqBuffer should move into the cwksp */ + seqBuffer, + seqBufferCapacity + }; + zc->externalMatchCtx = emctx; +} diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index fdbb1323f47..fc266996a54 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -352,11 +352,8 @@ struct ZSTD_CCtx_params_s { /* Controls prefetching in some dictMatchState matchfinders */ ZSTD_paramSwitch_e prefetchCDictTables; - /* Controls block-level sequence compression API */ - int useExternalMatchfinder; - /* Controls whether zstd will fall back to an internal matchfinder - * when the external matchfinder returns a non-zero error code. */ + * when the external matchfinder returns an error code. */ int enableMatchfinderFallback; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ @@ -394,7 +391,6 @@ typedef struct { typedef struct { void* mState; ZSTD_externalMatchFinder_F* mFinder; - ZSTD_externalMatchStateDestructor_F* mStateDestructor; ZSTD_Sequence* seqBuffer; size_t seqBufferCapacity; } ZSTD_externalMatchCtx; @@ -469,7 +465,7 @@ struct ZSTD_CCtx_s { /* Workspace for block splitter */ ZSTD_blockSplitCtx blockSplitCtx; - /* External block matchfinder */ + /* Workspace for external matchfinder */ ZSTD_externalMatchCtx externalMatchCtx; }; diff --git a/lib/zstd.h b/lib/zstd.h index b33ae503337..1a275bbd6a2 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -446,7 +446,6 @@ typedef enum { * ZSTD_c_useBlockSplitter * ZSTD_c_useRowMatchFinder * ZSTD_c_prefetchCDictTables - * ZSTD_c_useExternalMatchfinder * ZSTD_c_enableMatchfinderFallback * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; @@ -468,8 +467,7 @@ typedef enum { ZSTD_c_experimentalParam14=1011, ZSTD_c_experimentalParam15=1012, ZSTD_c_experimentalParam16=1013, - ZSTD_c_experimentalParam17=1014, - ZSTD_c_experimentalParam18=1015 + ZSTD_c_experimentalParam17=1014 } ZSTD_cParameter; typedef struct { @@ -532,7 +530,7 @@ typedef enum { * They will be used to compress next frame. * Resetting session never fails. * - The parameters : changes all parameters back to "default". - * This removes any reference to any dictionary too. + * This removes any reference to any dictionary too. @nocommit mention external matchfinder * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) * - Both : similar to resetting the session, followed by resetting parameters. @@ -1481,22 +1479,10 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, /* Block-level sequence compression API */ // @nocommit improve docs -typedef enum { - /* External matchfinder successfully compressed the block. */ - ZSTD_emf_error_none = 0, - - /* External matchfinder hit an unspecified failure condition. - * Will fail the overall compression operation. */ - ZSTD_emf_error_generic= 1, -} ZSTD_externalMatchFinder_errorCode_e; - -typedef struct { - size_t nbSeqsFound; - ZSTD_externalMatchFinder_errorCode_e errorCode; -} ZSTD_externalMatchResult; +#define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1)) // @nocommit move bounds comments into docs -typedef ZSTD_externalMatchResult ZSTD_externalMatchFinder_F ( +typedef size_t ZSTD_externalMatchFinder_F ( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, // outSeqsCapacity >= blockSize / MINMATCH @@ -1506,22 +1492,11 @@ typedef ZSTD_externalMatchResult ZSTD_externalMatchFinder_F ( // dictSize is not bounded ); -typedef void ZSTD_externalMatchStateDestructor_F ( - void* externalMatchState -); - -ZSTDLIB_STATIC_API size_t -ZSTD_registerExternalMatchFinder( +ZSTDLIB_STATIC_API void +ZSTD_refExternalMatchFinder( ZSTD_CCtx* cctx, void* externalMatchState, - ZSTD_externalMatchFinder_F* externalMatchFinder, - ZSTD_externalMatchStateDestructor_F* externalMatchStateDestructor -); - -/* Note: calls the previously-registered destructor if it is not NULL. */ -ZSTDLIB_STATIC_API void -ZSTD_clearExternalMatchFinder( - ZSTD_CCtx* cctx + ZSTD_externalMatchFinder_F* externalMatchFinder ); /****************************************/ @@ -2062,10 +2037,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 // @nocommit document -#define ZSTD_c_useExternalMatchfinder ZSTD_c_experimentalParam17 - -// @nocommit document -#define ZSTD_c_enableMatchfinderFallback ZSTD_c_experimentalParam18 +#define ZSTD_c_enableMatchfinderFallback ZSTD_c_experimentalParam17 /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, From 182dbf8f62f74d03042220dfac8211339fa42617 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 30 Nov 2022 17:07:37 -0500 Subject: [PATCH 17/34] Add hidden useExternalMatchfinder bool to CCtx_params_s --- lib/compress/zstd_compress.c | 20 ++++++++++++-------- lib/compress/zstd_compress_internal.h | 7 ++++++- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b9672e70094..e1813c087f9 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2927,14 +2927,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) * We need to revisit soon and implement it. */ if (zc->appliedParams.useBlockSplitter == ZSTD_ps_enable) { RETURN_ERROR_IF( - zc->externalMatchCtx.mFinder != NULL, + zc->appliedParams.useExternalMatchfinder, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Block splitting with external matchfinder enabled is not currently supported. " "Note: block splitting is enabled by default at high compression levels." ); } else { RETURN_ERROR_IF( - zc->externalMatchCtx.mFinder != NULL, + zc->appliedParams.useExternalMatchfinder, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Long-distance matching with external matchfinder enabled is not currently supported." ); @@ -2954,7 +2954,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* External matchfinder + LDM is technically possible, just not implemented yet. * We need to revisit soon and implement it. */ RETURN_ERROR_IF( - zc->externalMatchCtx.mFinder != NULL, + zc->appliedParams.useExternalMatchfinder, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Long-distance matching with external matchfinder enabled is not currently supported." ); @@ -2973,10 +2973,11 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); - } else if (zc->externalMatchCtx.mFinder != NULL) { + } else if (zc->appliedParams.useExternalMatchfinder) { assert( zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize) ); + assert(zc->externalMatchCtx.mFinder != NULL); size_t nbExternalSeqs = (zc->externalMatchCtx.mFinder)( zc->externalMatchCtx.mState, @@ -3008,6 +3009,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) } } } else { /* not long range mode and no external matchfinder */ + assert(zc->externalMatchCtx.mFinder == NULL); ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, dictMode); @@ -3153,12 +3155,12 @@ static int ZSTD_isRLE(const BYTE* src, size_t length) { * This is just a heuristic based on the compressibility. * It may return both false positives and false negatives. */ -static int ZSTD_maybeRLE(seqStore_t const* seqStore, ZSTD_externalMatchCtx const* externalMatchCtx) +static int ZSTD_maybeRLE(seqStore_t const* seqStore, ZSTD_CCtx_params const* appliedParams) { size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); - if (externalMatchCtx->mFinder != NULL) { + if (appliedParams->useExternalMatchfinder) { /* We shouldn't make any assumptions about how an external matchfinder * will compress an RLE block. */ return 1; @@ -3980,7 +3982,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, * This is only an issue for zstd <= v1.4.3 */ !zc->isFirstBlock && - ZSTD_maybeRLE(&zc->seqStore, &zc->externalMatchCtx) && + ZSTD_maybeRLE(&zc->seqStore, &zc->appliedParams) && ZSTD_isRLE((BYTE const*)src, srcSize)) { return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); @@ -6333,7 +6335,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); if (!cctx->isFirstBlock && - ZSTD_maybeRLE(&cctx->seqStore, &cctx->externalMatchCtx) && + ZSTD_maybeRLE(&cctx->seqStore, &cctx->appliedParams) && ZSTD_isRLE((BYTE const*)src, srcSize)) { /* We don't want to emit our first block as a RLE even if it qualifies because * doing so will cause the decoder (cli only) to throw a "should consume all input error." @@ -6614,6 +6616,8 @@ void ZSTD_refExternalMatchFinder( size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); ZSTD_Sequence* const seqBuffer = malloc(seqBufferCapacity * sizeof(ZSTD_Sequence)); + zc->requestedParams.useExternalMatchfinder = 1; + ZSTD_externalMatchCtx emctx = { mState, mFinder, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index fc266996a54..1b217bfbbbe 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -353,8 +353,13 @@ struct ZSTD_CCtx_params_s { ZSTD_paramSwitch_e prefetchCDictTables; /* Controls whether zstd will fall back to an internal matchfinder - * when the external matchfinder returns an error code. */ + * if the external matchfinder returns an error code. */ int enableMatchfinderFallback; + + /* Indicates whether an external matchfinder has been referenced. + * Users can't set this externally. + * It is set internally in ZSTD_refExternalMatchfinder(). */ + int useExternalMatchfinder; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) From 39a467f0620b47e899cff97dc153e9f7d033aa8b Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 30 Nov 2022 17:42:28 -0500 Subject: [PATCH 18/34] Eliminate malloc, move to cwksp allocation --- lib/compress/zstd_compress.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e1813c087f9..80f35df2233 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -28,9 +28,6 @@ #include "zstd_ldm.h" #include "zstd_compress_superblock.h" #include "../common/bits.h" /* ZSTD_highbit32 */ -#define ZSTD_DEPS_NEED_MALLOC -#include "../common/zstd_deps.h" /* ZSTD_malloc */ -#include /* *************************************************************** * Tuning parameters @@ -1506,7 +1503,8 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( const ZSTD_paramSwitch_e useRowMatchFinder, const size_t buffInSize, const size_t buffOutSize, - const U64 pledgedSrcSize) + const U64 pledgedSrcSize, + int useExternalMatchfinder) { size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); @@ -1530,6 +1528,9 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; + size_t const externalSeqSpace = useExternalMatchfinder ? + ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX) * sizeof(ZSTD_Sequence) : 0; + size_t const neededSpace = cctxSpace + entropySpace + @@ -1538,7 +1539,8 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( ldmSeqSpace + matchStateSize + tokenSpace + - bufferSpace; + bufferSpace + + externalSeqSpace; DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); return neededSpace; @@ -1556,7 +1558,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) * be needed. However, we still allocate two 0-sized buffers, which can * take space under ASAN. */ return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchfinder); } size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) @@ -1617,7 +1619,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) return ZSTD_estimateCCtxSize_usingCCtxParams_internal( &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, - ZSTD_CONTENTSIZE_UNKNOWN); + ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchfinder); } } @@ -1918,7 +1920,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const neededSpace = ZSTD_estimateCCtxSize_usingCCtxParams_internal( ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, - buffInSize, buffOutSize, pledgedSrcSize); + buffInSize, buffOutSize, pledgedSrcSize, params->useExternalMatchfinder); int resizeWorkspace; FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); @@ -2031,6 +2033,14 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->ldmState.loadedDictEnd = 0; } + /* reserve space for block-level external sequences */ + if (params->useExternalMatchfinder) { + size_t const maxNbExternalSeq = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); + zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq; + zc->externalMatchCtx.seqBuffer = + (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence)); + } + DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace)); @@ -6613,18 +6623,14 @@ void ZSTD_refExternalMatchFinder( ZSTD_CCtx* zc, void* mState, ZSTD_externalMatchFinder_F* mFinder ) { - size_t const seqBufferCapacity = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); - ZSTD_Sequence* const seqBuffer = malloc(seqBufferCapacity * sizeof(ZSTD_Sequence)); - zc->requestedParams.useExternalMatchfinder = 1; - ZSTD_externalMatchCtx emctx = { mState, mFinder, - /* @nocommit seqBuffer should move into the cwksp */ - seqBuffer, - seqBufferCapacity + /* seqBuffer is allocated later (from the cwskp) */ + NULL, /* seqBuffer */ + 0 /* seqBufferCapacity */ }; zc->externalMatchCtx = emctx; } From 9f8aedf658361f5288f108a07a6b64aee7de3bcd Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 30 Nov 2022 18:12:08 -0500 Subject: [PATCH 19/34] Handle CCtx reset properly --- lib/compress/zstd_compress.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 80f35df2233..63cc5b23809 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -155,6 +155,13 @@ static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) cctx->cdict = NULL; } +/* Clears the external matchfinder, if one has been referenced. + * Does not free any resources owned by the external match state! */ +static void ZSTD_clearExternalMatchCtx(ZSTD_CCtx* cctx) { + ZSTD_externalMatchCtx nullCtx = {0}; + cctx->externalMatchCtx = nullCtx; +} + static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) { size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; @@ -1254,6 +1261,7 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, "Can't reset parameters only when not in init stage."); ZSTD_clearAllDicts(cctx); + ZSTD_clearExternalMatchCtx(cctx); return ZSTD_CCtxParams_reset(&cctx->requestedParams); } return 0; From 59cc838de05dc562de629f81f10f57be07fb983b Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 30 Nov 2022 18:23:45 -0500 Subject: [PATCH 20/34] Ensure seqStore has enough space for external sequences --- lib/compress/zstd_compress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 63cc5b23809..a93e72fe273 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1516,7 +1516,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( { size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (cParams->minMatch==3) ? 3 : 4; + U32 const divider = (cParams->minMatch==3 || useExternalMatchfinder) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) @@ -1910,7 +1910,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (params->cParams.minMatch==3) ? 3 : 4; + U32 const divider = (params->cParams.minMatch==3 || params->useExternalMatchfinder) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 From fd9c46706ed2e83260f33b40499c3a3b8b2a4b7e Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 1 Dec 2022 13:42:31 -0500 Subject: [PATCH 21/34] fix capitalization --- contrib/externalMatchfinder/main.c | 2 +- lib/compress/zstd_compress.c | 48 +++++++++++++-------------- lib/compress/zstd_compress_internal.h | 6 ++-- lib/zstd.h | 4 +-- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 2b216485694..60966a9eb4d 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -26,7 +26,7 @@ int main(int argc, char *argv[]) { simpleExternalMatchFinder ); - res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchfinderFallback, 1); + res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 1); if (ZSTD_isError(res)) { printf("ERROR: %s\n", ZSTD_getErrorName(res)); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index a93e72fe273..de3046b91f8 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -588,7 +588,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = (int)ZSTD_ps_disable; return bounds; - case ZSTD_c_enableMatchfinderFallback: + case ZSTD_c_enableMatchFinderFallback: bounds.lowerBound = 0; bounds.upperBound = 1; return bounds; @@ -658,7 +658,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: - case ZSTD_c_enableMatchfinderFallback: + case ZSTD_c_enableMatchFinderFallback: default: return 0; } @@ -715,7 +715,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: - case ZSTD_c_enableMatchfinderFallback: + case ZSTD_c_enableMatchFinderFallback: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -947,10 +947,10 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value; return CCtxParams->prefetchCDictTables; - case ZSTD_c_enableMatchfinderFallback: - BOUNDCHECK(ZSTD_c_enableMatchfinderFallback, value); - CCtxParams->enableMatchfinderFallback = value; - return CCtxParams->enableMatchfinderFallback; + case ZSTD_c_enableMatchFinderFallback: + BOUNDCHECK(ZSTD_c_enableMatchFinderFallback, value); + CCtxParams->enableMatchFinderFallback = value; + return CCtxParams->enableMatchFinderFallback; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } @@ -1087,8 +1087,8 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_prefetchCDictTables: *value = (int)CCtxParams->prefetchCDictTables; break; - case ZSTD_c_enableMatchfinderFallback: - *value = CCtxParams->enableMatchfinderFallback; + case ZSTD_c_enableMatchFinderFallback: + *value = CCtxParams->enableMatchFinderFallback; break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } @@ -1512,11 +1512,11 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( const size_t buffInSize, const size_t buffOutSize, const U64 pledgedSrcSize, - int useExternalMatchfinder) + int useExternalMatchFinder) { size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (cParams->minMatch==3 || useExternalMatchfinder) ? 3 : 4; + U32 const divider = (cParams->minMatch==3 || useExternalMatchFinder) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) @@ -1536,7 +1536,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; - size_t const externalSeqSpace = useExternalMatchfinder ? + size_t const externalSeqSpace = useExternalMatchFinder ? ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX) * sizeof(ZSTD_Sequence) : 0; size_t const neededSpace = @@ -1566,7 +1566,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) * be needed. However, we still allocate two 0-sized buffers, which can * take space under ASAN. */ return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchfinder); + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder); } size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) @@ -1627,7 +1627,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) return ZSTD_estimateCCtxSize_usingCCtxParams_internal( &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, - ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchfinder); + ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder); } } @@ -1910,7 +1910,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (params->cParams.minMatch==3 || params->useExternalMatchfinder) ? 3 : 4; + U32 const divider = (params->cParams.minMatch==3 || params->useExternalMatchFinder) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 @@ -1928,7 +1928,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const neededSpace = ZSTD_estimateCCtxSize_usingCCtxParams_internal( ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, - buffInSize, buffOutSize, pledgedSrcSize, params->useExternalMatchfinder); + buffInSize, buffOutSize, pledgedSrcSize, params->useExternalMatchFinder); int resizeWorkspace; FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); @@ -2042,7 +2042,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, } /* reserve space for block-level external sequences */ - if (params->useExternalMatchfinder) { + if (params->useExternalMatchFinder) { size_t const maxNbExternalSeq = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq; zc->externalMatchCtx.seqBuffer = @@ -2945,14 +2945,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) * We need to revisit soon and implement it. */ if (zc->appliedParams.useBlockSplitter == ZSTD_ps_enable) { RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchfinder, + zc->appliedParams.useExternalMatchFinder, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Block splitting with external matchfinder enabled is not currently supported. " "Note: block splitting is enabled by default at high compression levels." ); } else { RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchfinder, + zc->appliedParams.useExternalMatchFinder, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Long-distance matching with external matchfinder enabled is not currently supported." ); @@ -2972,7 +2972,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* External matchfinder + LDM is technically possible, just not implemented yet. * We need to revisit soon and implement it. */ RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchfinder, + zc->appliedParams.useExternalMatchFinder, parameter_unsupported, // @nocommit Make this a parameter *combination* error? "Long-distance matching with external matchfinder enabled is not currently supported." ); @@ -2991,7 +2991,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); - } else if (zc->appliedParams.useExternalMatchfinder) { + } else if (zc->appliedParams.useExternalMatchFinder) { assert( zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize) ); @@ -3013,7 +3013,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ms->ldmSeqStore = NULL; lastLLSize = 0; } else { - if (zc->appliedParams.enableMatchfinderFallback) { + if (zc->appliedParams.enableMatchFinderFallback) { ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, dictMode); @@ -3178,7 +3178,7 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore, ZSTD_CCtx_params const* app size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); - if (appliedParams->useExternalMatchfinder) { + if (appliedParams->useExternalMatchFinder) { /* We shouldn't make any assumptions about how an external matchfinder * will compress an RLE block. */ return 1; @@ -6631,7 +6631,7 @@ void ZSTD_refExternalMatchFinder( ZSTD_CCtx* zc, void* mState, ZSTD_externalMatchFinder_F* mFinder ) { - zc->requestedParams.useExternalMatchfinder = 1; + zc->requestedParams.useExternalMatchFinder = 1; ZSTD_externalMatchCtx emctx = { mState, mFinder, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 1b217bfbbbe..19bf9c363b7 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -354,12 +354,12 @@ struct ZSTD_CCtx_params_s { /* Controls whether zstd will fall back to an internal matchfinder * if the external matchfinder returns an error code. */ - int enableMatchfinderFallback; + int enableMatchFinderFallback; /* Indicates whether an external matchfinder has been referenced. * Users can't set this externally. - * It is set internally in ZSTD_refExternalMatchfinder(). */ - int useExternalMatchfinder; + * It is set internally in ZSTD_refExternalMatchFinder(). */ + int useExternalMatchFinder; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) diff --git a/lib/zstd.h b/lib/zstd.h index 1a275bbd6a2..789a392ead6 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -446,7 +446,7 @@ typedef enum { * ZSTD_c_useBlockSplitter * ZSTD_c_useRowMatchFinder * ZSTD_c_prefetchCDictTables - * ZSTD_c_enableMatchfinderFallback + * ZSTD_c_enableMatchFinderFallback * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -2037,7 +2037,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 // @nocommit document -#define ZSTD_c_enableMatchfinderFallback ZSTD_c_experimentalParam17 +#define ZSTD_c_enableMatchFinderFallback ZSTD_c_experimentalParam17 /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, From c79ac4301c29d74143c91f765bc41b894191493e Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 1 Dec 2022 14:03:19 -0500 Subject: [PATCH 22/34] Add DEBUGLOG statements --- lib/compress/zstd_compress.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index de3046b91f8..6dac8a1f6af 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3012,12 +3012,18 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ); ms->ldmSeqStore = NULL; lastLLSize = 0; + DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs); } else { if (zc->appliedParams.enableMatchFinderFallback) { ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, dictMode); ms->ldmSeqStore = NULL; + DEBUGLOG( + 5, + "External matchfinder returned error code %lu. Falling back to internal matchfinder.", + (unsigned long)nbExternalSeqs + ); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } else { RETURN_ERROR( From 14d18ae4303423dd86043910fd628fcf6f863afe Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 1 Dec 2022 14:18:09 -0500 Subject: [PATCH 23/34] Add compressionLevel param to matchfinder API --- contrib/externalMatchfinder/matchfinder.c | 7 +++++-- contrib/externalMatchfinder/matchfinder.h | 4 +++- lib/compress/zstd_compress.c | 3 ++- lib/zstd.h | 3 ++- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index 5962e9635d8..35914fb69bb 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -9,12 +9,15 @@ static U32 const BADIDX = (1 << 31); size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, - const void* src, size_t srcSize, const void* dict, size_t dictSize + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel ) { (void)externalMatchState; (void)dict; (void)dictSize; - (void)outSeqsCapacity; // @nocommit return an error + (void)outSeqsCapacity; + (void)compressionLevel; const BYTE* const istart = (const BYTE*)src; const BYTE* const iend = istart + srcSize; diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index 3fc62f893ab..1d52531fc11 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -6,7 +6,9 @@ size_t simpleExternalMatchFinder( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, - const void* src, size_t srcSize, const void* dict, size_t dictSize + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel ); #endif diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 6dac8a1f6af..b0c018e0111 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3002,7 +3002,8 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->externalMatchCtx.seqBuffer, zc->externalMatchCtx.seqBufferCapacity, src, srcSize, - NULL, 0 /* dict and dictSize, currently not supported */ + NULL, 0, /* dict and dictSize, currently not supported */ + zc->appliedParams.compressionLevel ); if (nbExternalSeqs <= zc->externalMatchCtx.seqBufferCapacity) { diff --git a/lib/zstd.h b/lib/zstd.h index 789a392ead6..04549bbfba8 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1487,7 +1487,8 @@ typedef size_t ZSTD_externalMatchFinder_F ( ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, // outSeqsCapacity >= blockSize / MINMATCH const void* src, size_t srcSize, - const void* dict, size_t dictSize + const void* dict, size_t dictSize, + int compressionLevel // srcSize <= 128 KB // dictSize is not bounded ); From a84ec5875e287ce4d96b8876631c24bb25ed2d49 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 1 Dec 2022 14:40:56 -0500 Subject: [PATCH 24/34] fix c99 issues and add a param combination error code --- lib/common/error_private.c | 1 + lib/compress/zstd_compress.c | 76 +++++++++++++-------------- lib/compress/zstd_compress_internal.h | 3 +- lib/zstd.h | 12 ++--- lib/zstd_errors.h | 1 + 5 files changed, 47 insertions(+), 46 deletions(-) diff --git a/lib/common/error_private.c b/lib/common/error_private.c index 42f7e070191..e94646a1224 100644 --- a/lib/common/error_private.c +++ b/lib/common/error_private.c @@ -30,6 +30,7 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(corruption_detected): return "Data corruption detected"; case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters"; case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; case PREFIX(init_missing): return "Context should be init first"; case PREFIX(memory_allocation): return "Allocation error : not enough memory"; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b0c018e0111..83f315abc41 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2946,14 +2946,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) if (zc->appliedParams.useBlockSplitter == ZSTD_ps_enable) { RETURN_ERROR_IF( zc->appliedParams.useExternalMatchFinder, - parameter_unsupported, // @nocommit Make this a parameter *combination* error? + parameter_combination_unsupported, "Block splitting with external matchfinder enabled is not currently supported. " "Note: block splitting is enabled by default at high compression levels." ); } else { RETURN_ERROR_IF( zc->appliedParams.useExternalMatchFinder, - parameter_unsupported, // @nocommit Make this a parameter *combination* error? + parameter_combination_unsupported, "Long-distance matching with external matchfinder enabled is not currently supported." ); } @@ -2973,7 +2973,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) * We need to revisit soon and implement it. */ RETURN_ERROR_IF( zc->appliedParams.useExternalMatchFinder, - parameter_unsupported, // @nocommit Make this a parameter *combination* error? + parameter_combination_unsupported, "Long-distance matching with external matchfinder enabled is not currently supported." ); @@ -2997,47 +2997,47 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ); assert(zc->externalMatchCtx.mFinder != NULL); - size_t nbExternalSeqs = (zc->externalMatchCtx.mFinder)( - zc->externalMatchCtx.mState, - zc->externalMatchCtx.seqBuffer, - zc->externalMatchCtx.seqBufferCapacity, - src, srcSize, - NULL, 0, /* dict and dictSize, currently not supported */ - zc->appliedParams.compressionLevel - ); - - if (nbExternalSeqs <= zc->externalMatchCtx.seqBufferCapacity) { - ZSTD_sequencePosition seqPos = {0,0,0}; - ZSTD_copySequencesToSeqStoreExplicitBlockDelim( - zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbExternalSeqs, src, srcSize + { size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)( + zc->externalMatchCtx.mState, + zc->externalMatchCtx.seqBuffer, + zc->externalMatchCtx.seqBufferCapacity, + src, srcSize, + NULL, 0, /* dict and dictSize, currently not supported */ + zc->appliedParams.compressionLevel ); - ms->ldmSeqStore = NULL; - lastLLSize = 0; - DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs); - } else { - if (zc->appliedParams.enableMatchFinderFallback) { - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, - zc->appliedParams.useRowMatchFinder, - dictMode); - ms->ldmSeqStore = NULL; - DEBUGLOG( - 5, - "External matchfinder returned error code %lu. Falling back to internal matchfinder.", - (unsigned long)nbExternalSeqs + + if (nbExternalSeqs <= zc->externalMatchCtx.seqBufferCapacity) { + ZSTD_sequencePosition seqPos = {0,0,0}; + ZSTD_copySequencesToSeqStoreExplicitBlockDelim( + zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbExternalSeqs, src, srcSize ); - lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + ms->ldmSeqStore = NULL; + lastLLSize = 0; + DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs); } else { - RETURN_ERROR( - externalMatchFinder_failed, - "External matchfinder returned an error code!" - ); - } - } + if (zc->appliedParams.enableMatchFinderFallback) { + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); + ms->ldmSeqStore = NULL; + DEBUGLOG( + 5, + "External matchfinder returned error code %lu. Falling back to internal matchfinder.", + (unsigned long)nbExternalSeqs + ); + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + } else { + RETURN_ERROR( + externalMatchFinder_failed, + "External matchfinder returned an error code!" + ); + } + } } } else { /* not long range mode and no external matchfinder */ - assert(zc->externalMatchCtx.mFinder == NULL); ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, dictMode); + assert(zc->externalMatchCtx.mFinder == NULL); ms->ldmSeqStore = NULL; lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } @@ -6638,7 +6638,6 @@ void ZSTD_refExternalMatchFinder( ZSTD_CCtx* zc, void* mState, ZSTD_externalMatchFinder_F* mFinder ) { - zc->requestedParams.useExternalMatchFinder = 1; ZSTD_externalMatchCtx emctx = { mState, mFinder, @@ -6648,4 +6647,5 @@ void ZSTD_refExternalMatchFinder( 0 /* seqBufferCapacity */ }; zc->externalMatchCtx = emctx; + zc->requestedParams.useExternalMatchFinder = 1; } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 19bf9c363b7..0cb58352ca1 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -391,8 +391,7 @@ typedef struct { ZSTD_entropyCTablesMetadata_t entropyMetadata; } ZSTD_blockSplitCtx; -/* Block-level sequence compression API */ -// @nocommit improve docs +/* Context for block-level external matchfinder API */ typedef struct { void* mState; ZSTD_externalMatchFinder_F* mFinder; diff --git a/lib/zstd.h b/lib/zstd.h index 04549bbfba8..d51c7c48f14 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1477,20 +1477,20 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, const void* src, size_t srcSize); /* Block-level sequence compression API */ -// @nocommit improve docs +/* @nocommit improve docs */ #define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1)) -// @nocommit move bounds comments into docs +/* @nocommit move bounds comments into docs: + * outSeqsCapacity >= blockSize / MINMATCH + * srcSize <= 128 KB + * dictSize is not bounded */ typedef size_t ZSTD_externalMatchFinder_F ( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, - // outSeqsCapacity >= blockSize / MINMATCH const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel - // srcSize <= 128 KB - // dictSize is not bounded ); ZSTDLIB_STATIC_API void @@ -2037,7 +2037,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo */ #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 -// @nocommit document +/* @nocommit document */ #define ZSTD_c_enableMatchFinderFallback ZSTD_c_experimentalParam17 /*! ZSTD_CCtx_getParameter() : diff --git a/lib/zstd_errors.h b/lib/zstd_errors.h index 723b3c42def..21f8afb286f 100644 --- a/lib/zstd_errors.h +++ b/lib/zstd_errors.h @@ -62,6 +62,7 @@ typedef enum { ZSTD_error_dictionary_wrong = 32, ZSTD_error_dictionaryCreation_failed = 34, ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_combination_unsupported = 41, ZSTD_error_parameter_outOfBound = 42, ZSTD_error_tableLog_tooLarge = 44, ZSTD_error_maxSymbolValue_tooLarge = 46, From 807314cffd216f9fb711f4125f7811496c8f33c4 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 1 Dec 2022 15:21:52 -0500 Subject: [PATCH 25/34] nits --- lib/compress/zstd_compress.c | 10 +--------- lib/zstd.h | 1 - 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 83f315abc41..f6d3fced6b7 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -155,13 +155,6 @@ static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) cctx->cdict = NULL; } -/* Clears the external matchfinder, if one has been referenced. - * Does not free any resources owned by the external match state! */ -static void ZSTD_clearExternalMatchCtx(ZSTD_CCtx* cctx) { - ZSTD_externalMatchCtx nullCtx = {0}; - cctx->externalMatchCtx = nullCtx; -} - static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) { size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; @@ -1261,7 +1254,7 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, "Can't reset parameters only when not in init stage."); ZSTD_clearAllDicts(cctx); - ZSTD_clearExternalMatchCtx(cctx); + ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx)); return ZSTD_CCtxParams_reset(&cctx->requestedParams); } return 0; @@ -2901,7 +2894,6 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) assert(srcSize <= ZSTD_BLOCKSIZE_MAX); /* Assert that we have correctly flushed the ctx params into the ms's copy */ ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); - /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding * additional 1. We need to revisit and change this logic to be more consistent */ if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { diff --git a/lib/zstd.h b/lib/zstd.h index d51c7c48f14..71edd803c52 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -2670,7 +2670,6 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ - #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ #if defined (__cplusplus) From c58a56d94bc020baf84f301296f6eb3fd81684d2 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Sun, 4 Dec 2022 19:24:50 -0500 Subject: [PATCH 26/34] Test external matchfinder API --- contrib/externalMatchfinder/matchfinder.c | 5 +- lib/compress/zstd_compress.c | 59 ++++++++++++-- tests/Makefile | 2 +- tests/external_matchfinder.c | 99 +++++++++++++++++++++++ tests/external_matchfinder.h | 23 ++++++ tests/zstreamtest.c | 84 ++++++++++++++++++- 6 files changed, 261 insertions(+), 11 deletions(-) create mode 100644 tests/external_matchfinder.c create mode 100644 tests/external_matchfinder.h diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index 35914fb69bb..bc765c8bd69 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -1,6 +1,5 @@ #include "zstd_compress_internal.h" #include "matchfinder.h" -#include #define HSIZE 1024 static U32 const HLOG = 10; @@ -58,7 +57,7 @@ size_t simpleExternalMatchFinder( ZSTD_Sequence const finalSeq = { 0, iend - anchor, 0, 0 }; - outSeqs[seqCount] = finalSeq; + outSeqs[seqCount++] = finalSeq; - return seqCount; + return seqCount; } diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f6d3fced6b7..9bd861464f2 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2885,6 +2885,49 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) ssPtr->longLengthType = ZSTD_llt_none; } +/* ZSTD_postProcessExternalMatchFinderResult() : + * Validates and post-processes sequences obtained through the external matchfinder API. + * Returns the number of sequences after post-processing, or an error code. */ +static size_t ZSTD_postProcessExternalMatchFinderResult( + ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, int emptySrc +) { + RETURN_ERROR_IF( + nbExternalSeqs > outSeqsCapacity, + externalMatchFinder_failed, + "External matchfinder returned error code %lu", + (unsigned long)nbExternalSeqs + ); + + RETURN_ERROR_IF( + nbExternalSeqs == 0 && !emptySrc, + externalMatchFinder_failed, + "External matchfinder produced zero sequences for a non-empty src buffer!" + ); + + if (emptySrc) { + ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence)); + return 1; + } else { + ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1]; + + /* Check if lastSeq is a block delimiter, append one if not */ + if (lastSeq.offset == 0 && lastSeq.matchLength == 0) { + return nbExternalSeqs; + } else { + /* This error condition is only possible if the external matchfinder + * produced an invalid parse, by definition of ZSTD_sequenceBound(). */ + RETURN_ERROR_IF( + nbExternalSeqs == outSeqsCapacity, + externalMatchFinder_failed, + "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!" + ); + + ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence)); + return nbExternalSeqs + 1; + } + } +} + typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) @@ -2998,10 +3041,17 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.compressionLevel ); - if (nbExternalSeqs <= zc->externalMatchCtx.seqBufferCapacity) { + size_t const nbPostProcessedSeqs = ZSTD_postProcessExternalMatchFinderResult( + zc->externalMatchCtx.seqBuffer, + nbExternalSeqs, + zc->externalMatchCtx.seqBufferCapacity, + srcSize == 0 /* emptySrc */ + ); + + if (!ZSTD_isError(nbPostProcessedSeqs)) { ZSTD_sequencePosition seqPos = {0,0,0}; ZSTD_copySequencesToSeqStoreExplicitBlockDelim( - zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbExternalSeqs, src, srcSize + zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs, src, srcSize ); ms->ldmSeqStore = NULL; lastLLSize = 0; @@ -3019,10 +3069,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } else { - RETURN_ERROR( - externalMatchFinder_failed, - "External matchfinder returned an error code!" - ); + return nbPostProcessedSeqs; /* return an error */ } } } } else { /* not long range mode and no external matchfinder */ diff --git a/tests/Makefile b/tests/Makefile index afea6475afb..44e0f6a7043 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -169,7 +169,7 @@ fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PR $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) CLEAN += zstreamtest zstreamtest32 -ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c seqgen.c zstreamtest.c +ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c seqgen.c zstreamtest.c external_matchfinder.c ZSTREAM_PROPER_FILES := $(ZDICT_FILES) $(ZSTREAM_LOCAL_FILES) ZSTREAMFILES := $(ZSTD_FILES) $(ZSTREAM_PROPER_FILES) zstreamtest32 : CFLAGS += -m32 diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c new file mode 100644 index 00000000000..8938c8db3f9 --- /dev/null +++ b/tests/external_matchfinder.c @@ -0,0 +1,99 @@ +#include "external_matchfinder.h" +#include +#include "zstd_compress_internal.h" +#include + +#define HSIZE 1024 +static U32 const HLOG = 10; +static U32 const MLS = 4; +static U32 const BADIDX = (1 << 31); + +static size_t simpleExternalMatchFinder( + void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel +) { + const BYTE* const istart = (const BYTE*)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + const BYTE* anchor = istart; + size_t seqCount = 0; + U32 hashTable[HSIZE]; + + (void)externalMatchState; + (void)dict; + (void)dictSize; + (void)outSeqsCapacity; + (void)compressionLevel; + + for (int i=0; i < HSIZE; i++) { + hashTable[i] = BADIDX; + } + + while (ip + 4 < iend) { + size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); + U32 const matchIndex = hashTable[hash]; + hashTable[hash] = ip - istart; + + if (matchIndex != BADIDX) { + const BYTE* const match = istart + matchIndex; + size_t const matchLen = ZSTD_count(ip, match, iend); + if (matchLen >= ZSTD_MINMATCH_MIN) { + U32 const litLen = ip - anchor; + U32 const offset = ip - match; + ZSTD_Sequence const seq = { + offset, litLen, matchLen, 0 + }; + outSeqs[seqCount++] = seq; + ip += matchLen; + anchor = ip; + continue; + } + } + + ip++; + } + + { ZSTD_Sequence const finalSeq = { + 0, iend - anchor, 0, 0 + }; + outSeqs[seqCount++] = finalSeq; + } + + return seqCount; +} + +size_t zstreamExternalMatchFinder( + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel +) { + EMF_testCase testCase = *((EMF_testCase*)externalMatchState); + memset(outSeqs, 0, outSeqsCapacity); + + switch (testCase) { + case EMF_ZERO_SEQS: + return 0; + case EMF_ONE_BIG_SEQ: + outSeqs[0].offset = 0; + outSeqs[0].matchLength = 0; + outSeqs[0].litLength = srcSize; + return 1; + case EMF_LOTS_OF_SEQS: + return simpleExternalMatchFinder( + externalMatchState, + outSeqs, outSeqsCapacity, + src, srcSize, + dict, dictSize, + compressionLevel + ); + case EMF_SMALL_ERROR: + return outSeqsCapacity + 1; + case EMF_BIG_ERROR: + default: + return ZSTD_EXTERNAL_MATCHFINDER_ERROR; + } +} diff --git a/tests/external_matchfinder.h b/tests/external_matchfinder.h new file mode 100644 index 00000000000..279aa7ab53a --- /dev/null +++ b/tests/external_matchfinder.h @@ -0,0 +1,23 @@ +#ifndef EXTERNAL_MATCHFINDER +#define EXTERNAL_MATCHFINDER + +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" + +/* See external_matchfinder.c for details on each test case */ +typedef enum { + EMF_ZERO_SEQS = 0, + EMF_ONE_BIG_SEQ = 1, + EMF_LOTS_OF_SEQS = 2, + EMF_BIG_ERROR = 3, + EMF_SMALL_ERROR = 4 +} EMF_testCase; + +size_t zstreamExternalMatchFinder( + void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel +); + +#endif // EXTERNAL_MATCHFINDER diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index ce9020f128c..204b82a760f 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -39,7 +39,7 @@ #include "seqgen.h" #include "util.h" #include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */ - +#include "external_matchfinder.h" /* zstreamExternalMatchFinder */ /*-************************************ * Constants @@ -1777,6 +1777,88 @@ static int basicUnitTests(U32 seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : External matchfinder API: ", testNb++); + { + size_t const dstBufSize = ZSTD_compressBound(CNBufferSize); + BYTE* dstBuf = (BYTE*)malloc(ZSTD_compressBound(dstBufSize)); + size_t const checkBufSize = CNBufferSize; + BYTE* checkBuf = (BYTE*)malloc(checkBufSize); + int enableFallback; + size_t res; + EMF_testCase externalMatchState; + + ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters); + + /* Reference external matchfinder outside the test loop to + * check that the reference is preserved across compressions */ + ZSTD_refExternalMatchFinder( + zc, + &externalMatchState, + zstreamExternalMatchFinder + ); + + for (enableFallback = 0; enableFallback < 1; enableFallback++) { + size_t testCaseId; + + EMF_testCase const EMF_successCases[] = { + EMF_ONE_BIG_SEQ, + EMF_LOTS_OF_SEQS, + }; + size_t const EMF_numSuccessCases = 2; + + EMF_testCase const EMF_failureCases[] = { + EMF_ZERO_SEQS, + EMF_BIG_ERROR, + EMF_SMALL_ERROR, + }; + size_t const EMF_numFailureCases = 3; + + /* Test external matchfinder success scenarios */ + for (testCaseId = 0; testCaseId < EMF_numSuccessCases; testCaseId++) { + externalMatchState = EMF_successCases[testCaseId]; + ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); + res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize); + CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res)); + CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res)); + CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!"); + } + + /* Test external matchfinder failure scenarios */ + for (testCaseId = 0; testCaseId < EMF_numFailureCases; testCaseId++) { + externalMatchState = EMF_failureCases[testCaseId]; + ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); + res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize); + if (enableFallback) { + CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res)); + CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!"); + } else { + CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!"); + CHECK( + ZSTD_getErrorCode(res) != ZSTD_error_externalMatchFinder_failed, + "EMF: Wrong error code: %s", ZSTD_getErrorName(res) + ); + } + } + + /* Test compression with external matchfinder + empty src buffer */ + externalMatchState = EMF_ZERO_SEQS; + ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); + res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, 0); + CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res)); + CHECK(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res) != 0, "EMF: Empty src round trip failed!"); + } + + /* Test that reset clears the external matchfinder */ + ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters); + externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */ + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0)); + CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize)); + } + DISPLAYLEVEL(3, "OK \n"); + _end: FUZ_freeDictionary(dictionary); ZSTD_freeCStream(zc); From 78a9e4a45c277f9a88d63deadb7c8d9943054727 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 6 Dec 2022 02:41:27 -0500 Subject: [PATCH 27/34] C90 compat for simpleExternalMatchFinder --- contrib/externalMatchfinder/matchfinder.c | 34 ++++++++++++----------- tests/external_matchfinder.c | 10 ++++--- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index bc765c8bd69..55f686b0a8f 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -7,28 +7,29 @@ static U32 const MLS = 4; static U32 const BADIDX = (1 << 31); size_t simpleExternalMatchFinder( - void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel ) { - (void)externalMatchState; - (void)dict; - (void)dictSize; - (void)outSeqsCapacity; - (void)compressionLevel; - const BYTE* const istart = (const BYTE*)src; const BYTE* const iend = istart + srcSize; const BYTE* ip = istart; const BYTE* anchor = istart; - size_t seqCount = 0; - U32 hashTable[HSIZE]; - for (int i=0; i < HSIZE; i++) { - hashTable[i] = BADIDX; - } + + (void)externalMatchState; + (void)dict; + (void)dictSize; + (void)outSeqsCapacity; + (void)compressionLevel; + + { int i; + for (i=0; i < HSIZE; i++) { + hashTable[i] = BADIDX; + } } while (ip + 4 < iend) { size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); @@ -54,10 +55,11 @@ size_t simpleExternalMatchFinder( ip++; } - ZSTD_Sequence const finalSeq = { - 0, iend - anchor, 0, 0 - }; - outSeqs[seqCount++] = finalSeq; + { ZSTD_Sequence const finalSeq = { + 0, iend - anchor, 0, 0 + }; + outSeqs[seqCount++] = finalSeq; + } return seqCount; } diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index 8938c8db3f9..2c43a0127c2 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -9,7 +9,8 @@ static U32 const MLS = 4; static U32 const BADIDX = (1 << 31); static size_t simpleExternalMatchFinder( - void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel @@ -27,9 +28,10 @@ static size_t simpleExternalMatchFinder( (void)outSeqsCapacity; (void)compressionLevel; - for (int i=0; i < HSIZE; i++) { - hashTable[i] = BADIDX; - } + { int i; + for (i=0; i < HSIZE; i++) { + hashTable[i] = BADIDX; + } } while (ip + 4 < iend) { size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); From 0de43b9a8a59166ff21d114c11e41fb476a733e0 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 6 Dec 2022 13:23:38 -0500 Subject: [PATCH 28/34] Fix some @nocommits and an ASAN bug --- contrib/externalMatchfinder/main.c | 2 +- lib/compress/zstd_compress.c | 6 ++++-- lib/zstd.h | 12 ++++++------ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 60966a9eb4d..7d84123ddd8 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -17,7 +17,7 @@ int main(int argc, char *argv[]) { ZSTD_CCtx* zc = ZSTD_createCCtx(); - int simpleExternalMatchState = 0xdeadbeef; // @nocommit + int simpleExternalMatchState = 0xdeadbeef; // Here is the crucial bit of code! ZSTD_refExternalMatchFinder( diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 9bd861464f2..51c8c20fbee 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1529,8 +1529,10 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; - size_t const externalSeqSpace = useExternalMatchFinder ? - ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX) * sizeof(ZSTD_Sequence) : 0; + size_t const maxNbExternalSeq = ZSTD_sequenceBound(ZSTD_BLOCKSIZE_MAX); + size_t const externalSeqSpace = useExternalMatchFinder + ? ZSTD_cwksp_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence)) + : 0; size_t const neededSpace = cctxSpace + diff --git a/lib/zstd.h b/lib/zstd.h index 71edd803c52..bed40cbc35b 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -530,7 +530,7 @@ typedef enum { * They will be used to compress next frame. * Resetting session never fails. * - The parameters : changes all parameters back to "default". - * This removes any reference to any dictionary too. @nocommit mention external matchfinder + * This also removes any reference to any dictionary or external matchfinder. * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) * - Both : similar to resetting the session, followed by resetting parameters. @@ -1477,14 +1477,14 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize, const void* src, size_t srcSize); /* Block-level sequence compression API */ -/* @nocommit improve docs */ +/* @nocommit document */ #define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1)) -/* @nocommit move bounds comments into docs: - * outSeqsCapacity >= blockSize / MINMATCH - * srcSize <= 128 KB - * dictSize is not bounded */ +/* @nocommit document these constraints: + * - outSeqsCapacity >= (blockSize / MINMATCH) + 1 + * - srcSize <= 128 KB + * - dictSize is not bounded */ typedef size_t ZSTD_externalMatchFinder_F ( void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, From 99215cf1690dc4fefc48360c1ff90194f9b44661 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 7 Dec 2022 11:13:02 -0500 Subject: [PATCH 29/34] nit --- contrib/externalMatchfinder/matchfinder.c | 10 +++++----- tests/external_matchfinder.c | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c index 55f686b0a8f..7fabc1a4a1b 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalMatchfinder/matchfinder.c @@ -34,14 +34,14 @@ size_t simpleExternalMatchFinder( while (ip + 4 < iend) { size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); U32 const matchIndex = hashTable[hash]; - hashTable[hash] = ip - istart; + hashTable[hash] = (U32)(ip - istart); if (matchIndex != BADIDX) { const BYTE* const match = istart + matchIndex; - size_t const matchLen = ZSTD_count(ip, match, iend); + U32 const matchLen = (U32)ZSTD_count(ip, match, iend); if (matchLen >= ZSTD_MINMATCH_MIN) { - U32 const litLen = ip - anchor; - U32 const offset = ip - match; + U32 const litLen = (U32)(ip - anchor); + U32 const offset = (U32)(ip - match); ZSTD_Sequence const seq = { offset, litLen, matchLen, 0 }; @@ -56,7 +56,7 @@ size_t simpleExternalMatchFinder( } { ZSTD_Sequence const finalSeq = { - 0, iend - anchor, 0, 0 + 0, (U32)(iend - anchor), 0, 0 }; outSeqs[seqCount++] = finalSeq; } diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index 2c43a0127c2..fc504ee8471 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -36,14 +36,14 @@ static size_t simpleExternalMatchFinder( while (ip + 4 < iend) { size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); U32 const matchIndex = hashTable[hash]; - hashTable[hash] = ip - istart; + hashTable[hash] = (U32)(ip - istart); if (matchIndex != BADIDX) { const BYTE* const match = istart + matchIndex; - size_t const matchLen = ZSTD_count(ip, match, iend); + U32 const matchLen = (U32)ZSTD_count(ip, match, iend); if (matchLen >= ZSTD_MINMATCH_MIN) { - U32 const litLen = ip - anchor; - U32 const offset = ip - match; + U32 const litLen = (U32)(ip - anchor); + U32 const offset = (U32)(ip - match); ZSTD_Sequence const seq = { offset, litLen, matchLen, 0 }; @@ -58,7 +58,7 @@ static size_t simpleExternalMatchFinder( } { ZSTD_Sequence const finalSeq = { - 0, iend - anchor, 0, 0 + 0, (U32)(iend - anchor), 0, 0 }; outSeqs[seqCount++] = finalSeq; } @@ -82,7 +82,7 @@ size_t zstreamExternalMatchFinder( case EMF_ONE_BIG_SEQ: outSeqs[0].offset = 0; outSeqs[0].matchLength = 0; - outSeqs[0].litLength = srcSize; + outSeqs[0].litLength = (U32)(srcSize); return 1; case EMF_LOTS_OF_SEQS: return simpleExternalMatchFinder( From 08d6c3908e079d340152f3e9dec6850aba46d4e0 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 7 Dec 2022 11:29:15 -0500 Subject: [PATCH 30/34] nit --- tests/zstreamtest.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 204b82a760f..383219c2120 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1856,6 +1856,9 @@ static int basicUnitTests(U32 seed, double compressibility) externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */ CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0)); CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize)); + + free(dstBuf); + free(checkBuf); } DISPLAYLEVEL(3, "OK \n"); From 321500f3462a2aa78bf404dbecfa51ef8c0a758f Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 7 Dec 2022 11:59:20 -0500 Subject: [PATCH 31/34] nits --- contrib/externalMatchfinder/main.c | 8 ++++---- contrib/externalMatchfinder/matchfinder.h | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c index 7d84123ddd8..003b4b55783 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalMatchfinder/main.c @@ -5,7 +5,7 @@ #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "zstd_errors.h" -#include "matchfinder.h" // simpleExternalMatchFinder, simpleExternalMatchStateDestructor +#include "matchfinder.h" // simpleExternalMatchFinder int main(int argc, char *argv[]) { size_t res; @@ -35,17 +35,17 @@ int main(int argc, char *argv[]) { FILE *f = fopen(argv[1], "rb"); fseek(f, 0, SEEK_END); - long srcSize = ftell(f); + long const srcSize = ftell(f); fseek(f, 0, SEEK_SET); char *src = malloc(srcSize + 1); fread(src, srcSize, 1, f); fclose(f); - size_t dstSize = ZSTD_compressBound(srcSize); + size_t const dstSize = ZSTD_compressBound(srcSize); char *dst = malloc(dstSize); - size_t cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize); + size_t const cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize); if (ZSTD_isError(cSize)) { printf("ERROR: %s\n", ZSTD_getErrorName(cSize)); diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h index 1d52531fc11..b89a1a4a4e0 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalMatchfinder/matchfinder.h @@ -5,7 +5,8 @@ #include "zstd.h" size_t simpleExternalMatchFinder( - void* externalMatchState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel From 785c3dd00f2a96bc22ca756bf968f6a954a70012 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 7 Dec 2022 12:01:20 -0500 Subject: [PATCH 32/34] forward declare copySequencesToSeqStore functions in zstd_compress_internal.h --- lib/compress/zstd_compress.c | 17 +------------- lib/compress/zstd_compress_internal.h | 32 ++++++++++++++++++++++----- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 51c8c20fbee..29819add542 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -6072,9 +6072,6 @@ static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 return offBase; } -/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of - * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. - */ size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, @@ -6129,19 +6126,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, return 0; } -/* Returns the number of bytes to move the current read position back by. - * Only non-zero if we ended up splitting a sequence. - * Otherwise, it may return a ZSTD error if something went wrong. - * - * This function will attempt to scan through blockSize bytes - * represented by the sequences in @inSeqs, - * storing any (partial) sequences. - * - * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to - * avoid splitting a match, or to avoid splitting a match such that it would produce a match - * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. - */ -static size_t +size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, const void* src, size_t blockSize) diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 0cb58352ca1..a5aba62cdba 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -155,12 +155,6 @@ typedef struct { size_t posInSrc; /* Number of bytes given by sequences provided so far */ } ZSTD_sequencePosition; -size_t -ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, - ZSTD_sequencePosition* seqPos, - const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, - const void* src, size_t blockSize); - UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; typedef struct { @@ -1442,4 +1436,30 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); */ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); +/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of + * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. + */ +size_t +ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, + ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize); + +/* Returns the number of bytes to move the current read position back by. + * Only non-zero if we ended up splitting a sequence. + * Otherwise, it may return a ZSTD error if something went wrong. + * + * This function will attempt to scan through blockSize bytes + * represented by the sequences in @inSeqs, + * storing any (partial) sequences. + * + * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to + * avoid splitting a match, or to avoid splitting a match such that it would produce a match + * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. + */ +size_t +ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize); + #endif /* ZSTD_COMPRESS_H */ From ef7fa30e208e16d4304c568af956d90e951791db Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 7 Dec 2022 12:06:46 -0500 Subject: [PATCH 33/34] nit --- tests/external_matchfinder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index fc504ee8471..6655ed5c038 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -73,7 +73,7 @@ size_t zstreamExternalMatchFinder( const void* dict, size_t dictSize, int compressionLevel ) { - EMF_testCase testCase = *((EMF_testCase*)externalMatchState); + EMF_testCase const testCase = *((EMF_testCase*)externalMatchState); memset(outSeqs, 0, outSeqsCapacity); switch (testCase) { From 8d4f42d2abb278fc1f4ba4546c9b9022e1fd9da1 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 7 Dec 2022 12:11:14 -0500 Subject: [PATCH 34/34] nit --- tests/zstreamtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 383219c2120..3cf791f6ca2 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -39,7 +39,7 @@ #include "seqgen.h" #include "util.h" #include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */ -#include "external_matchfinder.h" /* zstreamExternalMatchFinder */ +#include "external_matchfinder.h" /* zstreamExternalMatchFinder, EMF_testCase */ /*-************************************ * Constants