From f5728da365e14a715a131434847f732ee84d8719 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 18 Mar 2024 12:04:02 -0700 Subject: [PATCH 1/6] update targetCBlockSize documentation --- lib/compress/zstd_compress_superblock.c | 4 +-- lib/zstd.h | 38 ++++++++++++++----------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index 8c466c47952..628a2dccd09 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -469,8 +469,6 @@ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs, return n; } -#define CBLOCK_TARGET_SIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */ - /** ZSTD_compressSubBlock_multi() : * Breaks super-block into multiple sub-blocks and compresses them. * Entropy will be written into the first block. @@ -504,7 +502,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, const BYTE* llCodePtr = seqStorePtr->llCode; const BYTE* mlCodePtr = seqStorePtr->mlCode; const BYTE* ofCodePtr = seqStorePtr->ofCode; - size_t const minTarget = CBLOCK_TARGET_SIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */ + size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */ size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize); int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed); int writeSeqEntropy = 1; diff --git a/lib/zstd.h b/lib/zstd.h index ba611656ff6..115d7f2acca 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -262,9 +262,9 @@ ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer * /*! ZSTD_compressCCtx() : * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. - * Important : in order to behave similarly to `ZSTD_compress()`, - * this function compresses at requested compression level, - * __ignoring any other parameter__ . + * Important : in order to mirror `ZSTD_compress()` behavior, + * this function compresses at the requested compression level, + * __ignoring any other advanced parameter__ . * If any advanced parameter was set using the advanced API, * they will all be reset. Only `compressionLevel` remains. */ @@ -286,7 +286,7 @@ ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer * /*! ZSTD_decompressDCtx() : * Same as ZSTD_decompress(), * requires an allocated ZSTD_DCtx. - * Compatible with sticky parameters. + * Compatible with sticky parameters (see below). */ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, @@ -302,12 +302,12 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, * using ZSTD_CCtx_set*() functions. * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! - * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ . * * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). * * This API supersedes all other "advanced" API entry points in the experimental section. - * In the future, we expect to remove from experimental API entry points which are redundant with this API. + * In the future, we expect to remove API entry points from experimental which are redundant with this API. */ @@ -390,16 +390,19 @@ typedef enum { * The higher the value of selected strategy, the more complex it is, * resulting in stronger and slower compression. * Special: value 0 means "use default strategy". */ - ZSTD_c_targetCBlockSize=130, /* Tries to fit compressed block size to be - * around targetCBlockSize. No target when - * targetCBlockSize == 0. There is no guarantee - * on compressed block size (default:0). - * Since the decoder has to buffer a complete - * block to begin decoding it, in low band- - * width streaming environments this may - * improve end-to-end latency. Bound by - * ZSTD_TARGETCBLOCKSIZE_MIN and - * ZSTD_TARGETCBLOCKSIZE_MAX. */ + + ZSTD_c_targetCBlockSize=130, /* v1.5.6+ + * Attempts to fit compressed block size into approximatively targetCBlockSize. + * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX. + * Note that it's not a guarantee, just a convergence target (default:0). + * No target when targetCBlockSize == 0. + * This is helpful in low bandwidth streaming environments to improve end-to-end latency, + * when a client can make use of partial documents (a prominent example being Chrome). + * Note: this parameter is stable since v1.5.6. + * It was present as an experimental parameter in earlier versions, + * but we don't recomment using it with earlier library versions + * due to massive performance regressions. + */ /* LDM mode parameters */ ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. * This parameter is designed to improve compression ratio @@ -584,6 +587,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); /*! ZSTD_compress2() : * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * (note that this entry point doesn't even expose a compression level parameter). * ZSTD_compress2() always starts a new frame. * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() @@ -1250,7 +1254,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) /* Advanced parameter bounds */ -#define ZSTD_TARGETCBLOCKSIZE_MIN 64 +#define ZSTD_TARGETCBLOCKSIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */ #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX #define ZSTD_SRCSIZEHINT_MIN 0 #define ZSTD_SRCSIZEHINT_MAX INT_MAX From 5d82c2b57c0f5f239ba712a7e6ec46c84a6ba02d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 18 Mar 2024 12:17:41 -0700 Subject: [PATCH 2/6] add a paragraph on UB DCtx state after error --- lib/zstd.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/zstd.h b/lib/zstd.h index 115d7f2acca..d27e593179b 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -904,6 +904,12 @@ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); * @return : 0 when a frame is completely decoded and fully flushed, * or an error code, which can be tested using ZSTD_isError(), * or any other value > 0, which means there is some decoding or flushing to do to complete current frame. + * + * Note: when an operation returns with an error code, the @zds state if left in undefined state. + * It's UB to invoke `ZSTD_decompressStream()` on such a state. + * In order to re-use such a state, it must be reset first, + * which can be done explicitly (`ZSTD_DCtx_reset()`), + * or is sometimes implied (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`) */ ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); From 902c7ec1fe833f7f8d542fe94acba9e3a0a013a1 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 18 Mar 2024 12:30:35 -0700 Subject: [PATCH 3/6] add doc on CCtx UB state --- lib/zstd.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/zstd.h b/lib/zstd.h index d27e593179b..b13e7e95998 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -791,6 +791,11 @@ typedef enum { * only ZSTD_e_end or ZSTD_e_flush operations are allowed. * Before starting a new compression job, or changing compression parameters, * it is required to fully flush internal buffers. + * - note: if an operation ends with an error, it may leave @cctx in an undefined state. + * Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state. + * In order to be re-employed after an error, a state must be reset, + * which can be done explicitly (ZSTD_CCtx_reset()), + * or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx()) */ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, ZSTD_outBuffer* output, @@ -905,11 +910,11 @@ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); * or an error code, which can be tested using ZSTD_isError(), * or any other value > 0, which means there is some decoding or flushing to do to complete current frame. * - * Note: when an operation returns with an error code, the @zds state if left in undefined state. + * Note: when an operation returns with an error code, the @zds state may be left in undefined state. * It's UB to invoke `ZSTD_decompressStream()` on such a state. - * In order to re-use such a state, it must be reset first, + * In order to re-use such a state, it must be first reset, * which can be done explicitly (`ZSTD_DCtx_reset()`), - * or is sometimes implied (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`) + * or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`) */ ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); From 3d18d9a9ce5fd5a03c6389b17ee464cf2cf60e94 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 18 Mar 2024 12:30:54 -0700 Subject: [PATCH 4/6] updated API manual --- doc/zstd_manual.html | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 58fe958dc0b..48db40e6ba2 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -190,9 +190,9 @@

Compression context

  When compressing many times,
                    const void* src, size_t srcSize,
                          int compressionLevel);
 

Same as ZSTD_compress(), using an explicit ZSTD_CCtx. - Important : in order to behave similarly to `ZSTD_compress()`, - this function compresses at requested compression level, - __ignoring any other parameter__ . + Important : in order to mirror `ZSTD_compress()` behavior, + this function compresses at the requested compression level, + __ignoring any other advanced parameter__ . If any advanced parameter was set using the advanced API, they will all be reset. Only `compressionLevel` remains. @@ -212,7 +212,7 @@

Decompression context

  When decompressing many times,
                      const void* src, size_t srcSize);
 

Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx. - Compatible with sticky parameters. + Compatible with sticky parameters (see below).


@@ -296,6 +296,19 @@

Decompression context

  When decompressing many times,
                               * The higher the value of selected strategy, the more complex it is,
                               * resulting in stronger and slower compression.
                               * Special: value 0 means "use default strategy". */
+
+    ZSTD_c_targetCBlockSize=130, /* v1.5.6+
+                                  * Attempts to fit compressed block size into approximatively targetCBlockSize.
+                                  * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX.
+                                  * Note that it's not a guarantee, just a convergence target (default:0).
+                                  * No target when targetCBlockSize == 0.
+                                  * This is helpful in low bandwidth streaming environments to improve end-to-end latency,
+                                  * when a client can make use of partial documents (a prominent example being Chrome).
+                                  * Note: this parameter is stable since v1.5.6.
+                                  * It was present as an experimental parameter in earlier versions,
+                                  * but we don't recomment using it with earlier library versions
+                                  * due to massive performance regressions.
+                                  */
     /* LDM mode parameters */
     ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
                                      * This parameter is designed to improve compression ratio
@@ -375,7 +388,6 @@ 

Decompression context

  When decompressing many times,
      * ZSTD_c_forceMaxWindow
      * ZSTD_c_forceAttachDict
      * ZSTD_c_literalCompressionMode
-     * ZSTD_c_targetCBlockSize
      * ZSTD_c_srcSizeHint
      * ZSTD_c_enableDedicatedDictSearch
      * ZSTD_c_stableInBuffer
@@ -396,7 +408,7 @@ 

Decompression context

  When decompressing many times,
      ZSTD_c_experimentalParam3=1000,
      ZSTD_c_experimentalParam4=1001,
      ZSTD_c_experimentalParam5=1002,
-     ZSTD_c_experimentalParam6=1003,
+     /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */
      ZSTD_c_experimentalParam7=1004,
      ZSTD_c_experimentalParam8=1005,
      ZSTD_c_experimentalParam9=1006,
@@ -483,6 +495,7 @@ 

Decompression context

  When decompressing many times,
                        void* dst, size_t dstCapacity,
                  const void* src, size_t srcSize);
 

Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + (note that this entry point doesn't even expose a compression level parameter). ZSTD_compress2() always starts a new frame. Should cctx hold data from a previously unfinished frame, everything about it is forgotten. - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() @@ -668,6 +681,11 @@

Streaming compression functions

typedef enum {
             only ZSTD_e_end or ZSTD_e_flush operations are allowed.
             Before starting a new compression job, or changing compression parameters,
             it is required to fully flush internal buffers.
+  - note: if an operation ends with an error, it may leave @cctx in an undefined state.
+          Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state.
+          In order to be re-employed after an error, a state must be reset,
+          which can be done explicitly (ZSTD_CCtx_reset()),
+          or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx())
  
 


@@ -753,6 +771,12 @@

Streaming decompression functions


@return : 0 when a frame is completely decoded and fully flushed, or an error code, which can be tested using ZSTD_isError(), or any other value > 0, which means there is some decoding or flushing to do to complete current frame. + + Note: when an operation returns with an error code, the @zds state may be left in undefined state. + It's UB to invoke `ZSTD_decompressStream()` on such a state. + In order to re-use such a state, it must be first reset, + which can be done explicitly (`ZSTD_DCtx_reset()`), + or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`)


From c5da438dc0ca81ce697a73a02b060e3ba7550bab Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 18 Mar 2024 12:33:22 -0700 Subject: [PATCH 5/6] fix typo --- doc/zstd_manual.html | 2 +- lib/zstd.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 48db40e6ba2..bc4a2403648 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -306,7 +306,7 @@

Decompression context

  When decompressing many times,
                                   * when a client can make use of partial documents (a prominent example being Chrome).
                                   * Note: this parameter is stable since v1.5.6.
                                   * It was present as an experimental parameter in earlier versions,
-                                  * but we don't recomment using it with earlier library versions
+                                  * but it's not recommended using it with earlier library versions
                                   * due to massive performance regressions.
                                   */
     /* LDM mode parameters */
diff --git a/lib/zstd.h b/lib/zstd.h
index b13e7e95998..b110874b708 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -400,7 +400,7 @@ typedef enum {
                                   * when a client can make use of partial documents (a prominent example being Chrome).
                                   * Note: this parameter is stable since v1.5.6.
                                   * It was present as an experimental parameter in earlier versions,
-                                  * but we don't recomment using it with earlier library versions
+                                  * but it's not recommended using it with earlier library versions
                                   * due to massive performance regressions.
                                   */
     /* LDM mode parameters */

From 6f1215b874dbf74b50dcb64915e91e11ba198008 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 18 Mar 2024 14:10:08 -0700
Subject: [PATCH 6/6] fix ZSTD_TARGETCBLOCKSIZE_MIN test

when requested CBlockSize is too low,
bound it to the minimum
instead of returning an error.
---
 lib/compress/zstd_compress.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 451f2f91e6f..38c91473ef9 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -870,7 +870,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
 #else
         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
         CCtxParams->nbWorkers = value;
-        return CCtxParams->nbWorkers;
+        return (size_t)(CCtxParams->nbWorkers);
 #endif
 
     case ZSTD_c_jobSize :
@@ -894,7 +894,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
 #else
         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
         CCtxParams->overlapLog = value;
-        return CCtxParams->overlapLog;
+        return (size_t)CCtxParams->overlapLog;
 #endif
 
     case ZSTD_c_rsyncable :
@@ -904,7 +904,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
 #else
         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
         CCtxParams->rsyncable = value;
-        return CCtxParams->rsyncable;
+        return (size_t)CCtxParams->rsyncable;
 #endif
 
     case ZSTD_c_enableDedicatedDictSearch :
@@ -941,8 +941,10 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
         return CCtxParams->ldmParams.hashRateLog;
 
     case ZSTD_c_targetCBlockSize :
-        if (value!=0)   /* 0 ==> default */
+        if (value!=0) {  /* 0 ==> default */
+            value = MAX(value, ZSTD_TARGETCBLOCKSIZE_MIN);
             BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
+        }
         CCtxParams->targetCBlockSize = (U32)value;
         return CCtxParams->targetCBlockSize;
 
@@ -970,7 +972,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
     case ZSTD_c_validateSequences:
         BOUNDCHECK(ZSTD_c_validateSequences, value);
         CCtxParams->validateSequences = value;
-        return CCtxParams->validateSequences;
+        return (size_t)CCtxParams->validateSequences;
 
     case ZSTD_c_useBlockSplitter:
         BOUNDCHECK(ZSTD_c_useBlockSplitter, value);
@@ -985,7 +987,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
     case ZSTD_c_deterministicRefPrefix:
         BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
         CCtxParams->deterministicRefPrefix = !!value;
-        return CCtxParams->deterministicRefPrefix;
+        return (size_t)CCtxParams->deterministicRefPrefix;
 
     case ZSTD_c_prefetchCDictTables:
         BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
@@ -995,7 +997,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
     case ZSTD_c_enableSeqProducerFallback:
         BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
         CCtxParams->enableMatchFinderFallback = value;
-        return CCtxParams->enableMatchFinderFallback;
+        return (size_t)CCtxParams->enableMatchFinderFallback;
 
     case ZSTD_c_maxBlockSize:
         if (value!=0)    /* 0 ==> default */