From 68a4a034531df5bf2b895a2ca76c1bf629ee3415 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 Apr 2023 06:00:54 +0000 Subject: [PATCH 001/283] Bump github/codeql-action from 2.2.8 to 2.2.9 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.2.8 to 2.2.9. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/67a35a08586135a9573f4327e904ecbf517a882d...04df1262e6247151b5ac09cd2c303ac36ad3f62b) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 8a693fa2025..63b76554b56 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -59,6 +59,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@67a35a08586135a9573f4327e904ecbf517a882d # tag=v2.2.8 + uses: github/codeql-action/upload-sarif@04df1262e6247151b5ac09cd2c303ac36ad3f62b # tag=v2.2.9 with: sarif_file: results.sarif From da41d1d401fade89b868ec5306b2805460bcd909 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 Apr 2023 06:00:58 +0000 Subject: [PATCH 002/283] Bump ossf/scorecard-action from 2.1.2 to 2.1.3 Bumps [ossf/scorecard-action](https://github.com/ossf/scorecard-action) from 2.1.2 to 2.1.3. - [Release notes](https://github.com/ossf/scorecard-action/releases) - [Changelog](https://github.com/ossf/scorecard-action/blob/main/RELEASE.md) - [Commits](https://github.com/ossf/scorecard-action/compare/e38b1902ae4f44df626f11ba0734b14fb91f8f86...80e868c13c90f172d68d1f4501dee99e2479f7af) --- updated-dependencies: - dependency-name: ossf/scorecard-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 8a693fa2025..a88ec949485 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -32,7 +32,7 @@ jobs: persist-credentials: false - name: "Run analysis" - uses: ossf/scorecard-action@e38b1902ae4f44df626f11ba0734b14fb91f8f86 # tag=v2.1.2 + uses: ossf/scorecard-action@80e868c13c90f172d68d1f4501dee99e2479f7af # tag=v2.1.3 with: results_file: results.sarif results_format: sarif From 520843d8ffeaed2f57035b7ec3c24d2dbe2e342f Mon Sep 17 00:00:00 2001 From: Kim Date: Wed, 5 Apr 2023 18:35:58 +1200 Subject: [PATCH 003/283] Add win32 to windows-artifacts.yml --- .github/workflows/windows-artifacts.yml | 36 +++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/.github/workflows/windows-artifacts.yml b/.github/workflows/windows-artifacts.yml index 7d73b4b05ab..ddbeaa75638 100644 --- a/.github/workflows/windows-artifacts.yml +++ b/.github/workflows/windows-artifacts.yml @@ -49,3 +49,39 @@ jobs: with: path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-win64.zip name: zstd-${{ github.ref_name }}-win64.zip + + windows-32-artifacts: + # see https://ariya.io/2020/07/on-github-actions-with-msys2 + runs-on: windows-latest + defaults: + run: + shell: msys2 {0} + steps: + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 + - uses: msys2/setup-msys2@5beef6d11f48bba68b9eb503e3adc60b23c0cc36 # tag=v2 + with: + msystem: MINGW32 + install: make zlib git p7zip mingw-w64-i686-gcc + update: true + - name: display versions + run: | + make -v + cc -v + - name: Building zlib to static link + run: | + git clone --depth 1 --branch v1.2.11 https://github.com/madler/zlib + make -C zlib -f win32/Makefile.gcc libz.a + - name: Building zstd programs + run: | + CPPFLAGS=-I../zlib LDFLAGS=../zlib/libz.a make -j allzstd MOREFLAGS=-static V=1 + - name: Create artifacts + run: | + ./lib/dll/example/build_package.bat + mv bin/ zstd-${{ github.ref_name }}-win32/ + 7z a -tzip -mx9 zstd-${{ github.ref_name }}-win32.zip zstd-${{ github.ref_name }}-win32/ + cd .. + - name: Publish zstd-$VERSION-win32.zip + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # tag=v3 + with: + path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-win32.zip + name: zstd-${{ github.ref_name }}-win32.zip From a4fff8e0e81cb2e5ab44816b62b490cea3d4de0d Mon Sep 17 00:00:00 2001 From: Kim Date: Wed, 5 Apr 2023 20:22:29 +1200 Subject: [PATCH 004/283] Change to use strategy.matrix --- .github/workflows/windows-artifacts.yml | 61 +++++++------------------ 1 file changed, 16 insertions(+), 45 deletions(-) diff --git a/.github/workflows/windows-artifacts.yml b/.github/workflows/windows-artifacts.yml index ddbeaa75638..4c15e54c18c 100644 --- a/.github/workflows/windows-artifacts.yml +++ b/.github/workflows/windows-artifacts.yml @@ -10,9 +10,15 @@ on: permissions: read-all jobs: - windows-64-artifacts: + windows-artifacts: # see https://ariya.io/2020/07/on-github-actions-with-msys2 runs-on: windows-latest + # see https://github.com/msys2/setup-msys2 + strategy: + matrix: + include: + - { msystem: mingw64, env: x86_64, ziparch: win64 } + - { msystem: mingw32, env: i686, ziparch: win32 } defaults: run: shell: msys2 {0} @@ -20,9 +26,10 @@ jobs: - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 - uses: msys2/setup-msys2@5beef6d11f48bba68b9eb503e3adc60b23c0cc36 # tag=v2 with: - msystem: MINGW64 - install: make zlib git p7zip mingw-w64-x86_64-gcc + msystem: ${{ matrix.msystem }} + install: make zlib git p7zip mingw-w64-${{matrix.env}}-gcc update: true + - name: display versions run: | make -v @@ -33,55 +40,19 @@ jobs: git clone --depth 1 --branch v1.2.11 https://github.com/madler/zlib make -C zlib -f win32/Makefile.gcc libz.a - - name: Building zstd programs in 64-bit mode + - name: Building zstd programs run: | CPPFLAGS=-I../zlib LDFLAGS=../zlib/libz.a make -j allzstd MOREFLAGS=-static V=1 - name: Create artifacts run: | ./lib/dll/example/build_package.bat - mv bin/ zstd-${{ github.ref_name }}-win64/ - 7z a -tzip -mx9 zstd-${{ github.ref_name }}-win64.zip zstd-${{ github.ref_name }}-win64/ + mv bin/ zstd-${{ github.ref_name }}-${{matrix.ziparch}}/ + 7z a -tzip -mx9 zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip zstd-${{ github.ref_name }}-${{matrix.ziparch}}/ cd .. - - name: Publish zstd-$VERSION-win64.zip - uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # tag=v3 - with: - path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-win64.zip - name: zstd-${{ github.ref_name }}-win64.zip - - windows-32-artifacts: - # see https://ariya.io/2020/07/on-github-actions-with-msys2 - runs-on: windows-latest - defaults: - run: - shell: msys2 {0} - steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 - - uses: msys2/setup-msys2@5beef6d11f48bba68b9eb503e3adc60b23c0cc36 # tag=v2 - with: - msystem: MINGW32 - install: make zlib git p7zip mingw-w64-i686-gcc - update: true - - name: display versions - run: | - make -v - cc -v - - name: Building zlib to static link - run: | - git clone --depth 1 --branch v1.2.11 https://github.com/madler/zlib - make -C zlib -f win32/Makefile.gcc libz.a - - name: Building zstd programs - run: | - CPPFLAGS=-I../zlib LDFLAGS=../zlib/libz.a make -j allzstd MOREFLAGS=-static V=1 - - name: Create artifacts - run: | - ./lib/dll/example/build_package.bat - mv bin/ zstd-${{ github.ref_name }}-win32/ - 7z a -tzip -mx9 zstd-${{ github.ref_name }}-win32.zip zstd-${{ github.ref_name }}-win32/ - cd .. - - name: Publish zstd-$VERSION-win32.zip + - name: Publish zstd-$VERSION-${{matrix.ziparch}}.zip uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # tag=v3 with: - path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-win32.zip - name: zstd-${{ github.ref_name }}-win32.zip + path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip + name: zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip From c28031df8f1809621407b5bc9c4b3e052872409f Mon Sep 17 00:00:00 2001 From: daniellerozenblit <48103643+daniellerozenblit@users.noreply.github.com> Date: Thu, 6 Apr 2023 13:01:58 -0400 Subject: [PATCH 005/283] Add new line + [no-] to mmap-dict help output (#3601) --- programs/zstdcli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index d2465456b56..66952aa8293 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -254,7 +254,7 @@ static void usage_advanced(const char* programName) DISPLAYOUT("\n"); DISPLAYOUT(" --format=zstd Compress files to the `.zst` format. [Default]\n"); - DISPLAYOUT(" --mmap-dict Memory-map dictionary file rather than mallocing and loading all at once"); + DISPLAYOUT(" --[no-]mmap-dict Memory-map dictionary file rather than mallocing and loading all at once\n"); #ifdef ZSTD_GZCOMPRESS DISPLAYOUT(" --format=gzip Compress files to the `.gz` format.\n"); #endif From dc88f7b8a0c154a555c3af997e18ec174cf2d3e6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Apr 2023 06:00:15 +0000 Subject: [PATCH 006/283] Bump github/codeql-action from 2.2.9 to 2.2.11 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.2.9 to 2.2.11. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/04df1262e6247151b5ac09cd2c303ac36ad3f62b...d186a2a36cc67bfa1b860e6170d37fb9634742c7) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index fe33ed2787f..9ece5ba51b9 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -59,6 +59,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@04df1262e6247151b5ac09cd2c303ac36ad3f62b # tag=v2.2.9 + uses: github/codeql-action/upload-sarif@d186a2a36cc67bfa1b860e6170d37fb9634742c7 # tag=v2.2.11 with: sarif_file: results.sarif From d9582a0cb8070c78e8a53fba56b94a41936914aa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Apr 2023 06:00:18 +0000 Subject: [PATCH 007/283] Bump cygwin/cygwin-install-action from 3 to 4 Bumps [cygwin/cygwin-install-action](https://github.com/cygwin/cygwin-install-action) from 3 to 4. - [Release notes](https://github.com/cygwin/cygwin-install-action/releases) - [Commits](https://github.com/cygwin/cygwin-install-action/compare/f5e0f048310c425e84bc789f493a828c6dc80a25...006ad0b0946ca6d0a3ea2d4437677fa767392401) --- updated-dependencies: - dependency-name: cygwin/cygwin-install-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/dev-short-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml index e0e23f29242..940d883a5a8 100644 --- a/.github/workflows/dev-short-tests.yml +++ b/.github/workflows/dev-short-tests.yml @@ -503,7 +503,7 @@ jobs: steps: - run: git config --global core.autocrlf input - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 - - uses: cygwin/cygwin-install-action@f5e0f048310c425e84bc789f493a828c6dc80a25 # tag=master + - uses: cygwin/cygwin-install-action@006ad0b0946ca6d0a3ea2d4437677fa767392401 # tag=master with: platform: x86_64 packages: >- From e72e13ac6c1dc373a0826df0de6f9bf13ee02ee4 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Wed, 12 Apr 2023 16:00:28 -0700 Subject: [PATCH 008/283] [oss-fuzz] Fix simple_round_trip fuzzer with overlapping decompression When `ZSTD_c_maxBlockSize` is set, we weren't computing the decompression margin correctly, leading to `dstSize_tooSmall` errors. Fix that computation. This is just a bug in the fuzzer, not a bug in the library itself. Credit to OSS-Fuzz --- tests/fuzz/simple_round_trip.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c index 8b123197595..0fe46391764 100644 --- a/tests/fuzz/simple_round_trip.c +++ b/tests/fuzz/simple_round_trip.c @@ -27,7 +27,7 @@ static ZSTD_CCtx *cctx = NULL; static ZSTD_DCtx *dctx = NULL; -static size_t getDecompressionMargin(void const* compressed, size_t cSize, size_t srcSize, int hasSmallBlocks) +static size_t getDecompressionMargin(void const* compressed, size_t cSize, size_t srcSize, int hasSmallBlocks, int maxBlockSize) { size_t margin = ZSTD_decompressionMargin(compressed, cSize); if (!hasSmallBlocks) { @@ -37,7 +37,12 @@ static size_t getDecompressionMargin(void const* compressed, size_t cSize, size_ ZSTD_frameHeader zfh; size_t marginM; FUZZ_ZASSERT(ZSTD_getFrameHeader(&zfh, compressed, cSize)); - marginM = ZSTD_DECOMPRESSION_MARGIN(srcSize, zfh.blockSizeMax); + if (maxBlockSize == 0) { + maxBlockSize = zfh.blockSizeMax; + } else { + maxBlockSize = MIN(maxBlockSize, (int)zfh.blockSizeMax); + } + marginM = ZSTD_DECOMPRESSION_MARGIN(srcSize, maxBlockSize); if (marginM < margin) margin = marginM; } @@ -52,12 +57,14 @@ static size_t roundTripTest(void *result, size_t resultCapacity, size_t cSize; size_t dSize; int targetCBlockSize = 0; + int maxBlockSize = 0; if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) { size_t const remainingBytes = FUZZ_dataProducer_remainingBytes(producer); FUZZ_setRandomParameters(cctx, srcSize, producer); cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); FUZZ_ZASSERT(cSize); FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_targetCBlockSize, &targetCBlockSize)); + FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_maxBlockSize, &maxBlockSize)); // Compress a second time and check for determinism { size_t const cSize0 = cSize; @@ -89,7 +96,7 @@ static size_t roundTripTest(void *result, size_t resultCapacity, FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, dSize), "Corruption!"); { - size_t margin = getDecompressionMargin(compressed, cSize, srcSize, targetCBlockSize); + size_t margin = getDecompressionMargin(compressed, cSize, srcSize, targetCBlockSize, maxBlockSize); size_t const outputSize = srcSize + margin; char* const output = (char*)FUZZ_malloc(outputSize); char* const input = output + outputSize - cSize; From 0a794163f4feccf2c408c206f37da5f5b0eab4de Mon Sep 17 00:00:00 2001 From: Michael Redig Date: Thu, 13 Apr 2023 18:43:06 -0500 Subject: [PATCH 009/283] add makefile entry to build fat binary on macos --- Makefile | 9 +++++++++ programs/.gitignore | 2 ++ 2 files changed, 11 insertions(+) diff --git a/Makefile b/Makefile index 3b2e3999f47..72a9480aab1 100644 --- a/Makefile +++ b/Makefile @@ -197,6 +197,15 @@ uninstall: travis-install: $(MAKE) install PREFIX=~/install_test_dir +.PHONY: clangbuild-darwin-fat +clangbuild-darwin-fat: clean + clang -v + CXX=clang++ CC=clang CFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation -arch arm64" $(MAKE) zstd-release + mv programs/zstd programs/zstd_arm64 + CXX=clang++ CC=clang CFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation -arch x86_64" $(MAKE) zstd-release + mv programs/zstd programs/zstd_x64 + lipo -create programs/zstd_x64 programs/zstd_arm64 -output programs/zstd + .PHONY: gcc5build gcc6build gcc7build clangbuild m32build armbuild aarch64build ppcbuild ppc64build gcc5build: clean gcc-5 -v diff --git a/programs/.gitignore b/programs/.gitignore index 2d4edbe45b1..42a7e30dc68 100644 --- a/programs/.gitignore +++ b/programs/.gitignore @@ -9,6 +9,8 @@ zstd-small zstd-nolegacy zstd-dictBuilder zstd-dll +zstd_arm64 +zstd_x64 # Object files *.o From 803e65f935d0e0faefb268341aa67124336bdb58 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 17 Apr 2023 06:02:27 +0000 Subject: [PATCH 010/283] Bump actions/checkout from 3.5.0 to 3.5.2 Bumps [actions/checkout](https://github.com/actions/checkout) from 3.5.0 to 3.5.2. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/8f4b7f84864484a7bf31766abe9204da3cbe65b3...8e5e7e5ab8b370d6c329ec480221332ada57f0ab) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/dev-long-tests.yml | 50 ++++++------- .github/workflows/dev-short-tests.yml | 74 +++++++++---------- .../workflows/publish-release-artifacts.yml | 2 +- .github/workflows/scorecards.yml | 2 +- .github/workflows/windows-artifacts.yml | 2 +- 5 files changed, 65 insertions(+), 65 deletions(-) diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml index 16202260d01..deef9f78066 100644 --- a/.github/workflows/dev-long-tests.yml +++ b/.github/workflows/dev-long-tests.yml @@ -15,7 +15,7 @@ jobs: make-all: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 + - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 - name: make all run: make all @@ -26,7 +26,7 @@ jobs: DEVNULLRIGHTS: 1 READFROMBLOCKDEVICE: 1 steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 + - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 - name: make test run: make test @@ -34,7 +34,7 @@ jobs: make-test-osx: runs-on: macos-latest steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 + - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 - name: OS-X test run: make test # make -c lib all doesn't work because of the fact that it's not a tty @@ -45,7 +45,7 @@ jobs: DEVNULLRIGHTS: 1 READFROMBLOCKDEVICE: 1 steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 + - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 - name: make test run: | sudo apt-get -qqq update @@ -55,21 +55,21 @@ jobs: no-intrinsics-fuzztest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 + - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 - name: no intrinsics fuzztest run: MOREFLAGS="-DZSTD_NO_INTRINSICS" make -C tests fuzztest tsan-zstreamtest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 + - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 - name: thread sanitizer zstreamtest run: CC=clang ZSTREAM_TESTTIME=-T3mn make tsan-test-zstream ubsan-zstreamtest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 + - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 - name: undefined behavior sanitizer zstreamtest run: CC=clang make uasan-test-zstream @@ -77,7 +77,7 @@ jobs: tsan-fuzztest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 + - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 - name: thread sanitizer fuzztest run: CC=clang make tsan-fuzztest @@ -85,7 +85,7 @@ jobs: big-tests-zstreamtest32: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 + - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 - name: zstream tests in 32bit mode, with big tests run: | sudo apt-get -qqq update @@ -96,7 +96,7 @@ jobs: gcc-8-asan-ubsan-testzstd: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 + - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 - name: gcc-8 + ASan + UBSan + Test Zstd # See https://askubuntu.com/a/1428822 run: | @@ -108,14 +108,14 @@ jobs: clang-asan-ubsan-testzstd: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3 + - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 - name: clang + ASan + UBSan + Test Zstd run: CC=clang make -j uasan-test-zstd Date: Mon, 17 Apr 2023 15:43:27 -0700 Subject: [PATCH 011/283] added decoder errata paragraph for compressed blocks of size exactly 128 KB which used to be disallowed by the spec but have become allowed in more recent version of the spec. While this limitation is fixed in decoders v1.5.4+, implementers should refrain from generating such block with their custom encoder as they could be misclassified as corrupted by older decoder versions. --- doc/decompressor_errata.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/decompressor_errata.md b/doc/decompressor_errata.md index b162e7fd6e7..6c99cb01f46 100644 --- a/doc/decompressor_errata.md +++ b/doc/decompressor_errata.md @@ -6,12 +6,29 @@ Each entry will contain: 1. The last affected decompressor versions. 2. The decompressor components affected. 2. Whether the compressed frame could ever be produced by the reference compressor. -3. An example frame. +3. An example frame when it can be short enough to be displayed as hexadecimal 4. A description of the bug. The document is in reverse chronological order, with the bugs that affect the most recent zstd decompressor versions listed first. +Compressed block with a size of exactly 128 KB +------------------------------------------------ + +**Last affected version**: v1.5.2 + +**Affected decompressor component(s)**: Library & CLI + +**Produced by the reference compressor**: No + +The zstd decoder incorrectly rejected blocks of type `Compressed_Block` when their size was exactly 128 KB. + +This type of block was never generated by the reference compressor. + +These blocks used to be disallowed by the spec up until spec version 0.3.2 when the restriction was lifted by [PR#1689](https://github.com/facebook/zstd/pull/1689). + +> A Compressed_Block has the extra restriction that Block_Size is always strictly less than the decompressed size. If this condition cannot be respected, the block must be sent uncompressed instead (Raw_Block). + Compressed block with 0 literals and 0 sequences ------------------------------------------------ From 05434fe9a5d0d55650596e43171efcf1208c5c84 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 17 Apr 2023 15:50:31 -0700 Subject: [PATCH 012/283] removed travis & appveyor scripts we don't employ these CI systems anymore --- .travis.yml | 128 -------------------------------- appveyor.yml | 205 --------------------------------------------------- 2 files changed, 333 deletions(-) delete mode 100644 .travis.yml delete mode 100644 appveyor.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index b96bf8ba217..00000000000 --- a/.travis.yml +++ /dev/null @@ -1,128 +0,0 @@ -# Travis CI is used to test platforms that github-actions currently doesn't support -# without either self-hosting or some finnicky work-around. Also, some tests -# are troublesome to migrate since GH Actions runs tests not in a tty. -language: c - -git: - depth: 1 - -branches: - only: - - dev - - release - - master - - travisTest - -addons: - apt: - update: true - -env: - global: - - FUZZERTEST=-T1mn - ZSTREAM_TESTTIME=-T1mn - DECODECORPUS_TESTTIME=-T1mn - -matrix: - fast_finish: true - include: - - name: S390X (big endian) + Fuzz test - dist: trusty - arch: s390x - script: - - FUZZER_FLAGS=--no-big-tests make -C tests fuzztest - - - name: S390X (big endian) + Fuzz test + no intrinsics - dist: trusty - arch: s390x - script: - - MOREFLAGS="-DZSTD_NO_INTRINSICS" FUZZER_FLAGS=--no-big-tests make -C tests fuzztest - - - name: arm64 # ~2.5 mn - os: linux - arch: arm64 - script: - - make check - - - name: arm64fuzz - os: linux - arch: arm64 - script: - - make -C tests fuzztest - - # TODO: migrate to GH Actions once newest clang staticanalyze warnings are fixed - - name: static analyzer scanbuild # ~8mn - dist: trusty # note : it's important to pin down a version of static analyzer, since different versions report different false positives - script: - - make staticAnalyze - - # GH actions can't run this command on OS-X, non-tty issues - - name: OS-X make all lib - os: osx - script: - - make -C lib all - - # Introduced to check compat with old toolchains, to prevent e.g. #1872 - - name: ARM Build Test (on Trusty) - dist: trusty - script: - - make arminstall - - make armbuild - - # check release number (release/new tag only) - - name: Tag-Specific Test - if: tag =~ ^v[0-9]\.[0-9] - script: - - make -C tests checkTag - - tests/checkTag "$TRAVIS_BRANCH" - - - name: PPC64LE + Fuzz test # ~13mn - arch: ppc64le - env: - - FUZZER_FLAGS=--no-big-tests - - MOREFLAGS="-static" - script: - - cat /proc/cpuinfo - - make -C tests fuzztest - - # This test currently fails on GA specifically, for no obvious reason - # (it works fine on travisCI, and on local test platforms). - - name: Versions Compatibility Test # ~6mn - script: - - make -C tests versionsTest - - # meson dedicated test - - name: Focal (Meson + clang) # ~15mn - dist: focal - language: cpp - compiler: clang - install: - - sudo apt-get install -qq liblz4-dev valgrind tree - - | - travis_retry curl -o ~/ninja.zip -L 'https://github.com/ninja-build/ninja/releases/download/v1.9.0/ninja-linux.zip' && - unzip ~/ninja.zip -d ~/.local/bin - - | - travis_retry curl -o ~/get-pip.py -L 'https://bootstrap.pypa.io/pip/3.6/get-pip.py' && - python3 ~/get-pip.py --user && - pip3 install --user meson - script: - - | - meson setup \ - --buildtype=debugoptimized \ - -Db_lundef=false \ - -Dauto_features=enabled \ - -Dbin_programs=true \ - -Dbin_tests=true \ - -Dbin_contrib=true \ - -Ddefault_library=both \ - build/meson builddir - - pushd builddir - - ninja - - meson test --verbose --no-rebuild - - DESTDIR=./staging ninja install - - tree ./staging - after_failure: - - cat "$TRAVIS_BUILD_DIR"/builddir/meson-logs/testlog.txt - - allow_failures: - - env: ALLOW_FAILURES=true diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index c58ef91a1f2..00000000000 --- a/appveyor.yml +++ /dev/null @@ -1,205 +0,0 @@ -# Following tests are run _only_ on `release` branch -# and on selected feature branch named `appveyorTest` or `visual*` - -- - version: 1.0.{build} - branches: - only: - - release - - master - - /appveyor*/ - - /visual*/ - environment: - matrix: - - COMPILER: "gcc" - HOST: "mingw" - PLATFORM: "x64" - SCRIPT: "make allzstd MOREFLAGS=-static" - ARTIFACT: "true" - BUILD: "true" - - COMPILER: "gcc" - HOST: "mingw" - PLATFORM: "x86" - SCRIPT: "make allzstd MOREFLAGS=-static" - ARTIFACT: "true" - BUILD: "true" - - - COMPILER: "clang-cl" - HOST: "cmake-visual" - PLATFORM: "x64" - CONFIGURATION: "Release" - CMAKE_GENERATOR: "Visual Studio 15 2017" - CMAKE_GENERATOR_PLATFORM: "x64" - CMAKE_GENERATOR_TOOLSET: "LLVM" - APPVEYOR_BUILD_WORKER_IMAGE: "Visual Studio 2017" - - install: - - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION% - - SET PATH_ORIGINAL=%PATH% - - if [%HOST%]==[mingw] ( - SET "PATH_MINGW32=C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin" && - SET "PATH_MINGW64=C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin" && - COPY C:\msys64\usr\bin\make.exe C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin\make.exe && - COPY C:\msys64\usr\bin\make.exe C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin\make.exe - ) - - IF [%HOST%]==[visual] IF [%PLATFORM%]==[x64] ( - SET ADDITIONALPARAM=/p:LibraryPath="C:\Program Files\Microsoft SDKs\Windows\v7.1\lib\x64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;C:\Program Files (x86)\Microsoft Visual Studio 10.0\;C:\Program Files (x86)\Microsoft Visual Studio 10.0\lib\amd64;" - ) - - build_script: - - if [%HOST%]==[mingw] ( - ( if [%PLATFORM%]==[x64] ( - SET "PATH=%PATH_MINGW64%;%PATH_ORIGINAL%" - ) else if [%PLATFORM%]==[x86] ( - SET "PATH=%PATH_MINGW32%;%PATH_ORIGINAL%" - ) ) - ) - - if [%HOST%]==[mingw] if [%BUILD%]==[true] ( - make -v && - sh -c "%COMPILER% -v" && - ECHO Building zlib to static link && - SET "CC=%COMPILER%" && - sh -c "cd .. && git clone --depth 1 --branch v1.2.11 https://github.com/madler/zlib" && - sh -c "cd ../zlib && make -f win32/Makefile.gcc libz.a" - ECHO Building zstd && - SET "CPPFLAGS=-I../../zlib" && - SET "LDFLAGS=../../zlib/libz.a" && - sh -c "%SCRIPT%" && - ( if [%COMPILER%]==[gcc] if [%ARTIFACT%]==[true] - ECHO Creating artifacts && - ECHO %cd% && - lib\dll\example\build_package.bat && - make -C programs DEBUGFLAGS= clean zstd && - cd programs\ && 7z a -tzip -mx9 zstd-win-binary-%PLATFORM%.zip zstd.exe && - appveyor PushArtifact zstd-win-binary-%PLATFORM%.zip && - cp zstd.exe ..\bin\zstd.exe && - git clone --depth 1 --branch release https://github.com/facebook/zstd && - cd zstd && - git archive --format=tar release -o zstd-src.tar && - ..\zstd -19 zstd-src.tar && - appveyor PushArtifact zstd-src.tar.zst && - certUtil -hashfile zstd-src.tar.zst SHA256 > zstd-src.tar.zst.sha256.sig && - appveyor PushArtifact zstd-src.tar.zst.sha256.sig && - cd ..\..\bin\ && - 7z a -tzip -mx9 zstd-win-release-%PLATFORM%.zip * && - appveyor PushArtifact zstd-win-release-%PLATFORM%.zip - ) - ) - - if [%HOST%]==[cmake-visual] ( - ECHO *** && - ECHO *** Building %CMAKE_GENERATOR% ^(%CMAKE_GENERATOR_TOOLSET%^) %PLATFORM%\%CONFIGURATION% && - PUSHD build\cmake && - cmake -DBUILD_TESTING=ON . && - cmake --build . --config %CONFIGURATION% -j4 && - POPD && - ECHO *** - ) - - test_script: - - ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION% - - SET "CC=gcc" - - SET "CXX=g++" - - if [%TEST%]==[cmake] ( - mkdir build\cmake\build && - cd build\cmake\build && - SET FUZZERTEST=-T2mn && - SET ZSTREAM_TESTTIME=-T2mn && - cmake -G "Visual Studio 14 2015 Win64" .. && - cd ..\..\.. && - make clean - ) - - -# The following tests are for regular pushes -# into `dev` or some feature branch -# There run less tests, for shorter feedback loop - -- - version: 1.0.{build} - environment: - matrix: - - COMPILER: "visual" - HOST: "visual" - PLATFORM: "x64" - CONFIGURATION: "Debug" - - COMPILER: "visual" - HOST: "visual" - PLATFORM: "Win32" - CONFIGURATION: "Debug" - - COMPILER: "visual" - HOST: "visual" - PLATFORM: "x64" - CONFIGURATION: "Release" - - COMPILER: "visual" - HOST: "visual" - PLATFORM: "Win32" - CONFIGURATION: "Release" - - - COMPILER: "gcc" - HOST: "cygwin" - PLATFORM: "x64" - - - COMPILER: "clang-cl" - HOST: "cmake-visual" - PLATFORM: "x64" - CONFIGURATION: "Release" - CMAKE_GENERATOR: "Visual Studio 15 2017" - CMAKE_GENERATOR_PLATFORM: "x64" - CMAKE_GENERATOR_TOOLSET: "LLVM" - APPVEYOR_BUILD_WORKER_IMAGE: "Visual Studio 2017" - - install: - - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION% - - SET PATH_ORIGINAL=%PATH% - - if [%HOST%]==[cygwin] ( - ECHO Installing Cygwin Packages && - C:\cygwin64\setup-x86_64.exe -qnNdO -R "C:\cygwin64" -g -P ^ - gcc,^ - cmake,^ - make - ) - - IF [%HOST%]==[visual] IF [%PLATFORM%]==[x64] ( - SET ADDITIONALPARAM=/p:LibraryPath="C:\Program Files\Microsoft SDKs\Windows\v7.1\lib\x64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;C:\Program Files (x86)\Microsoft Visual Studio 10.0\;C:\Program Files (x86)\Microsoft Visual Studio 10.0\lib\amd64;" - ) - - build_script: - - ECHO Building %COMPILER% %PLATFORM% %CONFIGURATION% - - if [%HOST%]==[cygwin] ( - set CHERE_INVOKING=yes && - set CC=%COMPILER% && - C:\cygwin64\bin\bash --login -c " - set -e; - cd build/cmake; - CFLAGS='-Werror' cmake -G 'Unix Makefiles' -DCMAKE_BUILD_TYPE=Debug -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_FUZZER_FLAGS=-T20s -DZSTD_ZSTREAM_FLAGS=-T20s -DZSTD_FULLBENCH_FLAGS=-i0 .; - make VERBOSE=1 -j; - ctest -V -L Medium; - " - ) - - if [%HOST%]==[cmake-visual] ( - ECHO *** && - ECHO *** Building %CMAKE_GENERATOR% ^(%CMAKE_GENERATOR_TOOLSET%^) %PLATFORM%\%CONFIGURATION% && - PUSHD build\cmake && - cmake -DBUILD_TESTING=ON . && - cmake --build . --config %CONFIGURATION% -j4 && - POPD && - ECHO *** - ) - - if [%HOST%]==[visual] ( - ECHO *** && - ECHO *** Building Visual Studio 2012 %PLATFORM%\%CONFIGURATION% && - ECHO *** && - msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /p:ForceImportBeforeCppTargets=%APPVEYOR_BUILD_FOLDER%\build\VS2010\CompileAsCpp.props /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" && - DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe && - msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" && - DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe - ) - - - test_script: - - ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION% - - SET "FUZZERTEST=-T10s" - - if [%HOST%]==[mingw] ( - set "CC=%COMPILER%" && - make clean && - make check - ) \ No newline at end of file From 0abf2baef925fed4dac13d551c35d817e3206fdd Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 14 Apr 2023 14:27:06 -0700 Subject: [PATCH 013/283] Reduce streaming decompression memory by 128KB The split literals buffer patch increased streaming decompression memory by 64KB (shrunk lit buffer from 128KB to 64KB, and added 128KB). This patch removes the added 128KB buffer, because it isn't necessary. The buffer was there because the literals compression code didn't know the true `blockSizeMax` of the frame, and always put split literals so they ended 128KB - 32 from the beginning of the block. Instead, we can pass down the true `blockSizeMax` and ensure that the split literals end up at `blockSizeMax - 32` from the beginning of the block. We already reserve a full `blockSizeMax` bytes in streaming mode, so we won't be overwriting the extDict window. --- lib/decompress/zstd_decompress.c | 21 ++- lib/decompress/zstd_decompress_block.c | 220 ++++++++++++---------- lib/decompress/zstd_decompress_block.h | 2 +- lib/decompress/zstd_decompress_internal.h | 1 + tests/fullbench.c | 13 +- 5 files changed, 143 insertions(+), 114 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 7bc2713429d..94b90481b52 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -265,6 +265,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) #endif dctx->noForwardProgress = 0; dctx->oversizedDuration = 0; + dctx->isFrameDecompression = 1; #if DYNAMIC_BMI2 dctx->bmi2 = ZSTD_cpuSupportsBmi2(); #endif @@ -1003,7 +1004,8 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, switch(blockProperties.blockType) { case bt_compressed: - decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming); + assert(dctx->isFrameDecompression == 1); + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, not_streaming); break; case bt_raw : /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */ @@ -1319,7 +1321,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c { case bt_compressed: DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); - rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming); + assert(dctx->isFrameDecompression == 1); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming); dctx->expected = 0; /* Streaming not supported */ break; case bt_raw : @@ -1548,6 +1551,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->litEntropy = dctx->fseEntropy = 0; dctx->dictID = 0; dctx->bType = bt_reserved; + dctx->isFrameDecompression = 1; ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ dctx->LLTptr = dctx->entropy.LLTable; @@ -1911,6 +1915,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) || (reset == ZSTD_reset_session_and_parameters) ) { dctx->streamStage = zdss_init; dctx->noForwardProgress = 0; + dctx->isFrameDecompression = 1; } if ( (reset == ZSTD_reset_parameters) || (reset == ZSTD_reset_session_and_parameters) ) { @@ -1929,9 +1934,15 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) { - size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); - /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/ - unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2); + size_t const blockSize = (size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + /* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block + * ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing + * the block at the beginning of the output buffer, and maintain a full window. + * + * We need another blockSize worth of buffer so that we can store split + * literals at the end of the block without overwriting the extDict window. + */ + unsigned long long const neededRBSize = windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2); unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); size_t const minRBSize = (size_t) neededSize; RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 09896a931e2..5028a52f103 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -51,6 +51,13 @@ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } * Block decoding ***************************************************************/ +static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx) +{ + size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX; + assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX); + return blockSizeMax; +} + /*! ZSTD_getcBlockSize() : * Provides the size of compressed block from block header `src` */ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, @@ -73,41 +80,49 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize, const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately) { - if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) - { - /* room for litbuffer to fit without read faulting */ - dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH; + size_t const blockSizeMax = ZSTD_blockSizeMax(dctx); + assert(litSize <= blockSizeMax); + assert(dctx->isFrameDecompression || streaming == not_streaming); + assert(expectedWriteSize <= blockSizeMax); + if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) { + /* If we aren't streaming, we can just put the literals after the output + * of the current block. We don't need to worry about overwriting the + * extDict of our window, because it doesn't exist. + * So if we have space after the end of the block, just put it there. + */ + dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH; dctx->litBufferEnd = dctx->litBuffer + litSize; dctx->litBufferLocation = ZSTD_in_dst; - } - else if (litSize > ZSTD_LITBUFFEREXTRASIZE) - { - /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ + } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) { + /* Literals fit entirely within the extra buffer, put them there to avoid + * having to split the literals. + */ + dctx->litBuffer = dctx->litExtraBuffer; + dctx->litBufferEnd = dctx->litBuffer + litSize; + dctx->litBufferLocation = ZSTD_not_in_dst; + } else { + assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE); + /* Literals must be split between the output block and the extra lit + * buffer. We fill the extra lit buffer with the tail of the literals, + * and put the rest of the literals at the end of the block, with + * WILDCOPY_OVERLENGTH of buffer room to allow for overreads. + * This MUST not write more than our maxBlockSize beyond dst, because in + * streaming mode, that could overwrite part of our extDict window. + */ if (splitImmediately) { /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE; - } - else { + } else { /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */ dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize; dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize; } dctx->litBufferLocation = ZSTD_split; - } - else - { - /* fits entirely within litExtraBuffer, so no split is necessary */ - dctx->litBuffer = dctx->litExtraBuffer; - dctx->litBufferEnd = dctx->litBuffer + litSize; - dctx->litBufferLocation = ZSTD_not_in_dst; + assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize); } } -/* Hidden declaration for fullbench */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize, - void* dst, size_t dstCapacity, const streaming_operation streaming); /*! ZSTD_decodeLiteralsBlock() : * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current @@ -116,7 +131,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, * * @return : nb of bytes read from src (< srcSize ) * note : symbol not declared but exposed for fullbench */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, +static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */ void* dst, size_t dstCapacity, const streaming_operation streaming) { @@ -125,6 +140,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, { const BYTE* const istart = (const BYTE*) src; symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + size_t const blockSizeMax = ZSTD_blockSizeMax(dctx); switch(litEncType) { @@ -140,7 +156,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, U32 const lhlCode = (istart[0] >> 2) & 3; U32 const lhc = MEM_readLE32(istart); size_t hufSuccess; - size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); + size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); int const flags = 0 | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0) | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0); @@ -167,7 +183,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; } RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); if (!singleStream) RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong, "Not enough literals (%zu) for the 4-streams mode (min %u)", @@ -214,10 +230,12 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } if (dctx->litBufferLocation == ZSTD_split) { + assert(litSize > ZSTD_LITBUFFEREXTRASIZE); ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE); ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE); dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; dctx->litBufferEnd -= WILDCOPY_OVERLENGTH; + assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax); } RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); @@ -232,7 +250,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, case set_basic: { size_t litSize, lhSize; U32 const lhlCode = ((istart[0]) >> 2) & 3; - size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); + size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); switch(lhlCode) { case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ @@ -251,6 +269,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); + RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ @@ -279,7 +298,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, case set_rle: { U32 const lhlCode = ((istart[0]) >> 2) & 3; size_t litSize, lhSize; - size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); + size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); switch(lhlCode) { case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ @@ -298,7 +317,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; } RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); if (dctx->litBufferLocation == ZSTD_split) @@ -320,6 +339,18 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } } +/* Hidden declaration for fullbench */ +size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, + const void* src, size_t srcSize, + void* dst, size_t dstCapacity); +size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, + const void* src, size_t srcSize, + void* dst, size_t dstCapacity) +{ + dctx->isFrameDecompression = 0; + return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming); +} + /* Default FSE distribution tables. * These are pre-calculated FSE decoding tables using default distributions as defined in specification : * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions @@ -1320,22 +1351,24 @@ MEM_STATIC void ZSTD_assertValidSequence( BYTE const* prefixStart, BYTE const* virtualStart) { #if DEBUGLEVEL >= 1 - size_t const windowSize = dctx->fParams.windowSize; - size_t const sequenceSize = seq.litLength + seq.matchLength; - BYTE const* const oLitEnd = op + seq.litLength; - DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - assert(op <= oend); - assert((size_t)(oend - op) >= sequenceSize); - assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); - if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { - size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); - /* Offset must be within the dictionary. */ - assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); - assert(seq.offset <= windowSize + dictSize); - } else { - /* Offset must be within our window. */ - assert(seq.offset <= windowSize); + if (dctx->isFrameDecompression) { + size_t const windowSize = dctx->fParams.windowSize; + size_t const sequenceSize = seq.litLength + seq.matchLength; + BYTE const* const oLitEnd = op + seq.litLength; + DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + assert(op <= oend); + assert((size_t)(oend - op) >= sequenceSize); + assert(sequenceSize <= ZSTD_blockSizeMax(dctx)); + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { + size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); + /* Offset must be within the dictionary. */ + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); + assert(seq.offset <= windowSize + dictSize); + } else { + /* Offset must be within our window. */ + assert(seq.offset <= windowSize); + } } #else (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; @@ -1351,8 +1384,7 @@ DONT_VECTORIZE ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; @@ -1365,7 +1397,6 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer"); - (void)frame; /* Regen sequences */ if (nbSeq) { @@ -1453,7 +1484,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); #endif if (UNLIKELY(ZSTD_isError(oneSeqSize))) return oneSeqSize; @@ -1482,7 +1513,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); #endif if (UNLIKELY(ZSTD_isError(oneSeqSize))) return oneSeqSize; @@ -1519,7 +1550,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); #endif if (UNLIKELY(ZSTD_isError(oneSeqSize))) return oneSeqSize; @@ -1568,8 +1599,7 @@ DONT_VECTORIZE ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; @@ -1582,7 +1612,6 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, const BYTE* const vBase = (const BYTE*)(dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd); DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq); - (void)frame; /* Regen sequences */ if (nbSeq) { @@ -1621,7 +1650,7 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); #endif if (UNLIKELY(ZSTD_isError(oneSeqSize))) return oneSeqSize; @@ -1656,20 +1685,18 @@ static size_t ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } static size_t ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { - return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1697,8 +1724,7 @@ ZSTD_decompressSequencesLong_body( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; @@ -1710,7 +1736,6 @@ ZSTD_decompressSequencesLong_body( const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - (void)frame; /* Regen sequences */ if (nbSeq) { @@ -1764,7 +1789,7 @@ ZSTD_decompressSequencesLong_body( oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); + ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); #endif if (ZSTD_isError(oneSeqSize)) return oneSeqSize; @@ -1780,7 +1805,7 @@ ZSTD_decompressSequencesLong_body( ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); + ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); #endif if (ZSTD_isError(oneSeqSize)) return oneSeqSize; @@ -1812,7 +1837,7 @@ ZSTD_decompressSequencesLong_body( size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); + ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); #endif if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; @@ -1825,7 +1850,7 @@ ZSTD_decompressSequencesLong_body( ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); + ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); #endif if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; @@ -1863,10 +1888,9 @@ static size_t ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -1880,20 +1904,18 @@ DONT_VECTORIZE ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } static BMI2_TARGET_ATTRIBUTE size_t DONT_VECTORIZE ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { - return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1902,10 +1924,9 @@ static BMI2_TARGET_ATTRIBUTE size_t ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -1915,37 +1936,34 @@ typedef size_t (*ZSTD_decompressSequences_t)( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame); + const ZSTD_longOffset_e isLongOffset); #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { DEBUGLOG(5, "ZSTD_decompressSequences"); #if DYNAMIC_BMI2 if (ZSTD_DCtx_get_bmi2(dctx)) { - return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif - return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } static size_t ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer"); #if DYNAMIC_BMI2 if (ZSTD_DCtx_get_bmi2(dctx)) { - return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif - return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1960,16 +1978,15 @@ static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { DEBUGLOG(5, "ZSTD_decompressSequencesLong"); #if DYNAMIC_BMI2 if (ZSTD_DCtx_get_bmi2(dctx)) { - return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif - return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -2051,20 +2068,20 @@ static size_t ZSTD_maxShortOffset(void) size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, - const void* src, size_t srcSize, const int frame, const streaming_operation streaming) + const void* src, size_t srcSize, const streaming_operation streaming) { /* blockType == blockCompressed */ const BYTE* ip = (const BYTE*)src; DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); /* Note : the wording of the specification - * allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX. + * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx). * This generally does not happen, as it makes little sense, * since an uncompressed block would feature same size and have no decompression cost. * Also, note that decoder from reference libzstd before < v1.5.4 * would consider this edge case as an error. - * As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX + * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx) * for broader compatibility with the deployed ecosystem of zstd decoders */ - RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); + RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, ""); /* Decode literals section */ { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming); @@ -2079,7 +2096,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, /* Compute the maximum block size, which must also work when !frame and fParams are unset. * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t. */ - size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX)); + size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx)); size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart); /* isLongOffset must be true if there are long offsets. * Offsets are long if they are larger than ZSTD_maxShortOffset(). @@ -2145,16 +2162,16 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, { #endif #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT - return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); #endif } #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG /* else */ if (dctx->litBufferLocation == ZSTD_split) - return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); else - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); #endif } } @@ -2176,8 +2193,9 @@ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx, const void* src, size_t srcSize) { size_t dSize; + dctx->isFrameDecompression = 0; ZSTD_checkContinuity(dctx, dst, dstCapacity); - dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming); dctx->previousDstEnd = (char*)dst + dSize; return dSize; } diff --git a/lib/decompress/zstd_decompress_block.h b/lib/decompress/zstd_decompress_block.h index 9d1318882d0..ab152404ba0 100644 --- a/lib/decompress/zstd_decompress_block.h +++ b/lib/decompress/zstd_decompress_block.h @@ -47,7 +47,7 @@ typedef enum { */ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, - const void* src, size_t srcSize, const int frame, const streaming_operation streaming); + const void* src, size_t srcSize, const streaming_operation streaming); /* ZSTD_buildFSETable() : * generate FSE decoding table for one symbol (ll, ml or off) diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index c2ec5d9fbef..25aaebe5001 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -153,6 +153,7 @@ struct ZSTD_DCtx_s size_t litSize; size_t rleSize; size_t staticSize; + int isFrameDecompression; #if DYNAMIC_BMI2 != 0 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ #endif diff --git a/tests/fullbench.c b/tests/fullbench.c index 41bd26d0410..0391107b993 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -141,15 +141,14 @@ static size_t local_ZSTD_decompress(const void* src, size_t srcSize, static ZSTD_DCtx* g_zdc = NULL; #ifndef ZSTD_DLL_IMPORT -typedef enum { - not_streaming = 0, - is_streaming = 1 -} streaming_operation; -extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize, void* dst, size_t dstCapacity, const streaming_operation streaming); + +extern size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, + const void* src, size_t srcSize, + void* dst, size_t dstCapacity); static size_t local_ZSTD_decodeLiteralsBlock(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) { (void)src; (void)srcSize; (void)dst; (void)dstSize; - return ZSTD_decodeLiteralsBlock(g_zdc, buff2, g_cSize, dst, dstSize, not_streaming); + return ZSTD_decodeLiteralsBlock_wrapper(g_zdc, buff2, g_cSize, dst, dstSize); } static size_t local_ZSTD_decodeSeqHeaders(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) @@ -606,7 +605,7 @@ static int benchMem(unsigned benchNb, ip += ZSTD_blockHeaderSize; /* skip block header */ ZSTD_decompressBegin(g_zdc); CONTROL(iend > ip); - ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, (size_t)(iend-ip), dstBuff, dstBuffSize, not_streaming); /* skip literal segment */ + ip += ZSTD_decodeLiteralsBlock_wrapper(g_zdc, ip, (size_t)(iend-ip), dstBuff, dstBuffSize); /* skip literal segment */ g_cSize = (size_t)(iend-ip); memcpy(dstBuff2, ip, g_cSize); /* copy rest of block (it starts by SeqHeader) */ srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */ From 61efb2a047b308b6f0c265e1eae9ca8a062268e4 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 14 Apr 2023 17:06:24 -0700 Subject: [PATCH 014/283] Add ZSTD_d_maxBlockSize parameter Reduces memory when blocks are guaranteed to be smaller than allowed by the format. This is useful for streaming compression in conjunction with ZSTD_c_maxBlockSize. This PR saves 2 * (formatMaxBlockSize - paramMaxBlockSize) when streaming. Once it is rebased on top of PR #3616 it will save 3 * (formatMaxBlockSize - paramMaxBlockSize). --- lib/decompress/zstd_decompress.c | 29 ++++++++- lib/decompress/zstd_decompress_internal.h | 1 + lib/zstd.h | 20 +++++- tests/fuzz/simple_round_trip.c | 3 + tests/fuzz/stream_round_trip.c | 24 +++++++- tests/fuzzer.c | 19 ++++++ tests/zstreamtest.c | 75 +++++++++++++++++++++++ 7 files changed, 165 insertions(+), 6 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 94b90481b52..94eb95151b5 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -245,6 +245,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; dctx->disableHufAsm = 0; + dctx->maxBlockSizeParam = 0; } static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) @@ -972,6 +973,10 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; } + /* Shrink the blockSizeMax if enabled */ + if (dctx->maxBlockSizeParam != 0) + dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (unsigned)dctx->maxBlockSizeParam); + /* Loop on each block */ while (1) { BYTE* oBlockEnd = oend; @@ -1823,6 +1828,10 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) bounds.lowerBound = 0; bounds.upperBound = 1; return bounds; + case ZSTD_d_maxBlockSize: + bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN; + bounds.upperBound = ZSTD_BLOCKSIZE_MAX; + return bounds; default:; } @@ -1867,6 +1876,9 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value case ZSTD_d_disableHuffmanAssembly: *value = (int)dctx->disableHufAsm; return 0; + case ZSTD_d_maxBlockSize: + *value = dctx->maxBlockSizeParam; + return 0; default:; } RETURN_ERROR(parameter_unsupported, ""); @@ -1904,6 +1916,10 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value); dctx->disableHufAsm = value != 0; return 0; + case ZSTD_d_maxBlockSize: + if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value); + dctx->maxBlockSizeParam = value; + return 0; default:; } RETURN_ERROR(parameter_unsupported, ""); @@ -1932,9 +1948,9 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) return ZSTD_sizeof_DCtx(dctx); } -size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax) { - size_t const blockSize = (size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax); /* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block * ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing * the block at the beginning of the output buffer, and maintain a full window. @@ -1950,6 +1966,11 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long return minRBSize; } +size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +{ + return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX); +} + size_t ZSTD_estimateDStreamSize(size_t windowSize) { size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); @@ -2188,11 +2209,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, frameParameter_windowTooLarge, ""); + if (zds->maxBlockSizeParam != 0) + zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (unsigned)zds->maxBlockSizeParam); /* Adapt buffer sizes to frame header instructions */ { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered - ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) + ? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax) : 0; ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index 25aaebe5001..83a7a0115fd 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -167,6 +167,7 @@ struct ZSTD_DCtx_s ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ int disableHufAsm; + int maxBlockSizeParam; /* streaming */ ZSTD_dStreamStage streamStage; diff --git a/lib/zstd.h b/lib/zstd.h index e5c3f8b68b7..e94f1018970 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -618,6 +618,7 @@ typedef enum { * ZSTD_d_forceIgnoreChecksum * ZSTD_d_refMultipleDDicts * ZSTD_d_disableHuffmanAssembly + * ZSTD_d_maxBlockSize * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly */ @@ -625,7 +626,8 @@ typedef enum { ZSTD_d_experimentalParam2=1001, ZSTD_d_experimentalParam3=1002, ZSTD_d_experimentalParam4=1003, - ZSTD_d_experimentalParam5=1004 + ZSTD_d_experimentalParam5=1004, + ZSTD_d_experimentalParam6=1005 } ZSTD_dParameter; @@ -2430,6 +2432,22 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete */ #define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5 +/* ZSTD_d_maxBlockSize + * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB). + * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default. + * + * Forces the decompressor to reject blocks whose content size is + * larger than the configured maxBlockSize. When maxBlockSize is + * larger than the windowSize, the windowSize is used instead. + * This saves memory on the decoder when you know all blocks are small. + * + * This option is typically used in conjunction with ZSTD_c_maxBlockSize. + * + * WARNING: This causes the decoder to reject otherwise valid frames + * that have block sizes larger than the configured maxBlockSize. + */ +#define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6 + /*! ZSTD_DCtx_setFormat() : * This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter(). diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c index 0fe46391764..660092e6106 100644 --- a/tests/fuzz/simple_round_trip.c +++ b/tests/fuzz/simple_round_trip.c @@ -90,6 +90,9 @@ static size_t roundTripTest(void *result, size_t resultCapacity, FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0); } } + if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) { + FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, maxBlockSize)); + } dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize); FUZZ_ZASSERT(dSize); FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size"); diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c index 7d277a857a6..c2d6707a1ce 100644 --- a/tests/fuzz/stream_round_trip.c +++ b/tests/fuzz/stream_round_trip.c @@ -63,6 +63,8 @@ static size_t compress(uint8_t *dst, size_t capacity, size_t dstSize = 0; ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); FUZZ_setRandomParameters(cctx, srcSize, producer); + int maxBlockSize; + FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_maxBlockSize, &maxBlockSize)); while (srcSize > 0) { ZSTD_inBuffer in = makeInBuffer(&src, &srcSize, producer); @@ -93,6 +95,8 @@ static size_t compress(uint8_t *dst, size_t capacity, if (FUZZ_dataProducer_uint32Range(producer, 0, 7) == 0) { size_t const remaining = in.size - in.pos; FUZZ_setRandomParameters(cctx, remaining, producer); + /* Always use the same maxBlockSize */ + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, maxBlockSize)); } mode = -1; } @@ -132,6 +136,23 @@ static size_t compress(uint8_t *dst, size_t capacity, return dstSize; } +size_t decompress(void* dst, size_t dstCapacity, void const* src, size_t srcSize, FUZZ_dataProducer_t* producer) +{ + ZSTD_inBuffer in = {src, srcSize, 0}; + ZSTD_outBuffer out = {dst, dstCapacity, 0}; + int maxBlockSize; + FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_maxBlockSize, &maxBlockSize)); + if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) { + FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, maxBlockSize)); + } + while (in.pos < in.size) { + size_t const ret = ZSTD_decompressStream(dctx, &out, &in); + FUZZ_ZASSERT(ret); + FUZZ_ASSERT(ret == 0); + } + return out.pos; +} + int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { FUZZ_SEQ_PROD_SETUP(); @@ -163,8 +184,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { size_t const cSize = compress(cBuf, neededBufSize, src, size, producer); - size_t const rSize = - ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, cBuf, cSize); + size_t const rSize = decompress(rBuf, neededBufSize, cBuf, cSize, producer); FUZZ_ZASSERT(rSize); FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size"); FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!"); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 07ddfefd6db..b049a9e09ba 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -952,6 +952,25 @@ static int basicUnitTests(U32 const seed, double compressibility) ZSTD_freeCDict(cdict); ZSTD_freeCCtx(cctx); } + + DISPLAYLEVEL(3, "test%3i : maxBlockSize = 2K", testNb++); + { + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, 2048)); + CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 2048)); + + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); + CHECK_Z(cSize); + CHECK_Z(ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize)); + + CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 1024)); + CHECK(ZSTD_isError(ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize))); + + ZSTD_freeDCtx(dctx); + ZSTD_freeCCtx(cctx); + } DISPLAYLEVEL(3, "test%3i : ldm fill dict out-of-bounds check", testNb++); { diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 14c4af82fb7..85d0fc81fe0 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -722,6 +722,67 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : maxBlockSize = 2KB : ", testNb++); + { + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + size_t singlePassSize, streamingSize, streaming2KSize; + + { + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, 2048)); + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBufferSize); + CHECK_Z(cSize); + ZSTD_freeCCtx(cctx); + } + + CHECK_Z(ZSTD_decompressDCtx(dctx, decodedBuffer, CNBufferSize, compressedBuffer, cSize)); + singlePassSize = ZSTD_sizeof_DCtx(dctx); + CHECK_Z(singlePassSize); + + inBuff.src = compressedBuffer; + inBuff.size = cSize; + + outBuff.dst = decodedBuffer; + outBuff.size = decodedBufferSize; + + CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 2048)); + inBuff.pos = 0; + outBuff.pos = 0; + { + size_t const r = ZSTD_decompressStream(dctx, &outBuff, &inBuff); + CHECK_Z(r); + CHECK(r != 0, "Entire frame must be decompressed"); + } + streaming2KSize = ZSTD_sizeof_DCtx(dctx); + CHECK_Z(streaming2KSize); + + CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters)); + inBuff.pos = 0; + outBuff.pos = 0; + { + size_t const r = ZSTD_decompressStream(dctx, &outBuff, &inBuff); + CHECK_Z(r); + CHECK(r != 0, "Entire frame must be decompressed"); + } + streamingSize = ZSTD_sizeof_DCtx(dctx); + CHECK_Z(streamingSize); + + CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 1024)); + inBuff.pos = 0; + outBuff.pos = 0; + CHECK(!ZSTD_isError(ZSTD_decompressStream(dctx, &outBuff, &inBuff)), "decompression must fail"); + + CHECK(streamingSize < singlePassSize + (1 << 18) + 3 * ZSTD_BLOCKSIZE_MAX, "Streaming doesn't use the right amount of memory"); + CHECK(streamingSize != streaming2KSize + 3 * (ZSTD_BLOCKSIZE_MAX - 2048), "ZSTD_d_blockSizeMax didn't save the right amount of memory"); + DISPLAYLEVEL(3, "| %zu | %zu | %zu | ", singlePassSize, streaming2KSize, streamingSize); + + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + /* Decompression with ZSTD_d_stableOutBuffer */ cSize = ZSTD_compress(compressedBuffer, compressedBufferSize, CNBuffer, CNBufferSize, 1); CHECK_Z(cSize); @@ -2845,6 +2906,13 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest, if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_forceMaxWindow, FUZ_rand(&lseed) & 1, opaqueAPI) ); if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_deterministicRefPrefix, FUZ_rand(&lseed) & 1, opaqueAPI) ); + /* Set max block size parameters */ + if (FUZ_rand(&lseed) & 1) { + int maxBlockSize = (int)(FUZ_rand(&lseed) % ZSTD_BLOCKSIZE_MAX); + maxBlockSize = MAX(1024, maxBlockSize); + CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_maxBlockSize, maxBlockSize, opaqueAPI) ); + } + /* Apply parameters */ if (opaqueAPI) { DISPLAYLEVEL(5, "t%u: applying CCtxParams \n", testNb); @@ -2976,6 +3044,13 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest, if (FUZ_rand(&lseed) & 1) { CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_disableHuffmanAssembly, FUZ_rand(&lseed) & 1)); } + if (FUZ_rand(&lseed) & 1) { + int maxBlockSize; + CHECK_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_maxBlockSize, &maxBlockSize)); + CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_maxBlockSize, maxBlockSize)); + } else { + CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_maxBlockSize, 0)); + } { size_t decompressionResult = 1; ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 }; ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 }; From 0d6954b4cc309b430dd010dbeb20b112e7092644 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 19 Apr 2023 00:24:35 -0700 Subject: [PATCH 015/283] added golden file for the new decompressor erratum --- doc/decompressor_errata.md | 5 ++++- doc/educational_decoder/zstd_decompress.c | 2 +- tests/golden-decompression/block-128k.zst | Bin 0 -> 131081 bytes 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 tests/golden-decompression/block-128k.zst diff --git a/doc/decompressor_errata.md b/doc/decompressor_errata.md index 6c99cb01f46..e170a62c155 100644 --- a/doc/decompressor_errata.md +++ b/doc/decompressor_errata.md @@ -6,7 +6,7 @@ Each entry will contain: 1. The last affected decompressor versions. 2. The decompressor components affected. 2. Whether the compressed frame could ever be produced by the reference compressor. -3. An example frame when it can be short enough to be displayed as hexadecimal +3. An example frame (hexadecimal string when it can be short enough, link to golden file otherwise) 4. A description of the bug. The document is in reverse chronological order, with the bugs that affect the most recent zstd decompressor versions listed first. @@ -21,7 +21,10 @@ Compressed block with a size of exactly 128 KB **Produced by the reference compressor**: No +**Example Frame**: see zstd/tests/golden-decompression/block-128k.zst + The zstd decoder incorrectly rejected blocks of type `Compressed_Block` when their size was exactly 128 KB. +Note that `128 KB - 1` was accepted, and `128 KB + 1` is forbidden by the spec. This type of block was never generated by the reference compressor. diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c index 9ade7650268..3196b78dc38 100644 --- a/doc/educational_decoder/zstd_decompress.c +++ b/doc/educational_decoder/zstd_decompress.c @@ -1399,7 +1399,7 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) { /******* END OUTPUT SIZE COUNTING *********************************************/ /******* DICTIONARY PARSING ***************************************************/ -dictionary_t* create_dictionary() { +dictionary_t* create_dictionary(void) { dictionary_t* const dict = calloc(1, sizeof(dictionary_t)); if (!dict) { BAD_ALLOC(); diff --git a/tests/golden-decompression/block-128k.zst b/tests/golden-decompression/block-128k.zst new file mode 100644 index 0000000000000000000000000000000000000000..cdaeae39d106fa58bd60e8b167bd4d7704bd493c GIT binary patch literal 131081 zcmeIuu@L|e00gja^fu7yv=+C8l^kb}WQ6oJPG(8nw*>YwAz<>b*1`HT5V8DO@ z0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VK zfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5 zV8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM z7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b* z1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd z0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwA zz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEj zFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r z3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@ z0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VK mfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM{DB9JumnK> literal 0 HcmV?d00001 From 4c25ea329b851e1d2e45c2a91e0d5d79a3ad3be0 Mon Sep 17 00:00:00 2001 From: Daniel Kutenin Date: Thu, 20 Apr 2023 11:14:08 +0100 Subject: [PATCH 016/283] Disable unused variable warning in msan configurations --- lib/compress/zstd_cwksp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/compress/zstd_cwksp.h b/lib/compress/zstd_cwksp.h index cc7fb1c715c..cfe808f1505 100644 --- a/lib/compress/zstd_cwksp.h +++ b/lib/compress/zstd_cwksp.h @@ -192,6 +192,7 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { { intptr_t const offset = __msan_test_shadow(ws->initOnceStart, (U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart); + (void)offset; #if defined(ZSTD_MSAN_PRINT) if(offset!=-1) { __msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32); From be489f78df642cf8fd40fcfa59ec700cb494a1b5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Apr 2023 06:03:24 +0000 Subject: [PATCH 017/283] Bump github/codeql-action from 2.2.11 to 2.3.0 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.2.11 to 2.3.0. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/d186a2a36cc67bfa1b860e6170d37fb9634742c7...b2c19fb9a2a485599ccf4ed5d65527d94bc57226) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 5083378f482..e5f31fbac41 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -59,6 +59,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@d186a2a36cc67bfa1b860e6170d37fb9634742c7 # tag=v2.2.11 + uses: github/codeql-action/upload-sarif@b2c19fb9a2a485599ccf4ed5d65527d94bc57226 # tag=v2.3.0 with: sarif_file: results.sarif From 6ec18aed31a955ce7ce04403538f7539cd57eb56 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 26 Apr 2023 12:45:23 -0700 Subject: [PATCH 018/283] minor : update streaming_compression example display a warning when requesting multi-threading while linking to a library that doesn't support multi-threading. --- examples/streaming_compression.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/examples/streaming_compression.c b/examples/streaming_compression.c index ed0a3a69cd9..063aa82a294 100644 --- a/examples/streaming_compression.c +++ b/examples/streaming_compression.c @@ -42,7 +42,13 @@ static void compressFile_orDie(const char* fname, const char* outName, int cLeve */ CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel) ); CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) ); - ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads); + if (nbThreads > 1) { + size_t const r = ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads); + if (ZSTD_isError(r)) { + fprintf (stderr, "Note: the linked libzstd library doesn't support multithreading. " + "Reverting to single-thread mode. \n"); + } + } /* This loop read from the input file, compresses that entire chunk, * and writes all output produced to the output file. @@ -117,7 +123,7 @@ int main(int argc, const char** argv) } int cLevel = 1; - int nbThreads = 4; + int nbThreads = 1; if (argc >= 3) { cLevel = atoi (argv[2]); From 2a5076d26481fddb22f1e589c1d1666b0a7456d6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 May 2023 06:04:02 +0000 Subject: [PATCH 019/283] Bump github/codeql-action from 2.3.0 to 2.3.2 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.3.0 to 2.3.2. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/b2c19fb9a2a485599ccf4ed5d65527d94bc57226...f3feb00acb00f31a6f60280e6ace9ca31d91c76a) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index e5f31fbac41..58a2737280b 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -59,6 +59,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@b2c19fb9a2a485599ccf4ed5d65527d94bc57226 # tag=v2.3.0 + uses: github/codeql-action/upload-sarif@f3feb00acb00f31a6f60280e6ace9ca31d91c76a # tag=v2.3.2 with: sarif_file: results.sarif From cbf3e263160e0bfc9499f55f34c3759a14c0c1cc Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 19 Apr 2023 11:41:51 -0400 Subject: [PATCH 020/283] Allow `ZSTD_selectBlockCompressor()` to Return NULL Return an error rather than segfaulting. --- lib/compress/zstd_compress.c | 17 +++++++++++------ lib/compress/zstd_ldm.c | 2 ++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index d6133e70b44..c28b434562d 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3280,22 +3280,27 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) } /* Fallback to software matchfinder */ - { ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, - zc->appliedParams.useRowMatchFinder, - dictMode); + { ZSTD_blockCompressor const blockCompressor = + ZSTD_selectBlockCompressor( + zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); ms->ldmSeqStore = NULL; DEBUGLOG( 5, "External sequence producer returned error code %lu. Falling back to internal parser.", (unsigned long)nbExternalSeqs ); + RETURN_ERROR_IF(blockCompressor == NULL, parameter_combination_unsupported, "Got NULL block compressor!"); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } } } else { /* not long range mode and no external matchfinder */ - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, - zc->appliedParams.useRowMatchFinder, - dictMode); + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor( + zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); ms->ldmSeqStore = NULL; + RETURN_ERROR_IF(blockCompressor == NULL, parameter_combination_unsupported, "Got NULL block compressor!"); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 3d74ff19e3c..ec0690ccbf5 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -672,6 +672,8 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, /* Input positions */ BYTE const* ip = istart; + RETURN_ERROR_IF(blockCompressor == NULL, parameter_combination_unsupported, "Got NULL block compressor!"); + DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); /* If using opt parser, use LDMs only as candidates rather than always accepting them */ if (cParams->strategy >= ZSTD_btopt) { From 81b86a2024c80c1fc69bb6a76407628e063917ed Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 19 Apr 2023 12:19:56 -0400 Subject: [PATCH 021/283] NULL Out Block Compressor Table Entries When Excluded Don't check about excluding `ZSTD_fast`. It's always included so that we know we can resolve downwards and hit a strategy that's present. --- lib/compress/zstd_compress.c | 191 ++++++++++++++++++++++++++++++++--- 1 file changed, 176 insertions(+), 15 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c28b434562d..08bfd54c322 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2992,40 +2992,145 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { { ZSTD_compressBlock_fast /* default for 0 */, ZSTD_compressBlock_fast, +#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_doubleFast, +#endif +#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_greedy, +#endif +#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_lazy, +#endif +#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_lazy2, +#endif +#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_btlazy2, +#endif +#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_btopt, +#endif +#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_btultra, - ZSTD_compressBlock_btultra2 }, +#endif +#ifdef ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR + NULL +#else + ZSTD_compressBlock_btultra2 +#endif + }, { ZSTD_compressBlock_fast_extDict /* default for 0 */, ZSTD_compressBlock_fast_extDict, +#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_doubleFast_extDict, +#endif +#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_greedy_extDict, +#endif +#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_lazy_extDict, +#endif +#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_lazy2_extDict, +#endif +#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_btlazy2_extDict, +#endif +#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_btopt_extDict, +#endif +#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR + NULL, + NULL +#else ZSTD_compressBlock_btultra_extDict, - ZSTD_compressBlock_btultra_extDict }, + ZSTD_compressBlock_btultra_extDict +#endif + }, { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, ZSTD_compressBlock_fast_dictMatchState, +#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_doubleFast_dictMatchState, +#endif +#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_greedy_dictMatchState, +#endif +#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_lazy_dictMatchState, +#endif +#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_lazy2_dictMatchState, +#endif +#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_btlazy2_dictMatchState, +#endif +#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_btopt_dictMatchState, +#endif +#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR + NULL, + NULL +#else ZSTD_compressBlock_btultra_dictMatchState, - ZSTD_compressBlock_btultra_dictMatchState }, + ZSTD_compressBlock_btultra_dictMatchState +#endif + }, { NULL /* default for 0 */, NULL, NULL, +#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_greedy_dedicatedDictSearch, +#endif +#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_lazy_dedicatedDictSearch, +#endif +#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR + NULL, +#else ZSTD_compressBlock_lazy2_dedicatedDictSearch, +#endif NULL, NULL, NULL, @@ -3038,18 +3143,74 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) { static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { - { ZSTD_compressBlock_greedy_row, - ZSTD_compressBlock_lazy_row, - ZSTD_compressBlock_lazy2_row }, - { ZSTD_compressBlock_greedy_extDict_row, - ZSTD_compressBlock_lazy_extDict_row, - ZSTD_compressBlock_lazy2_extDict_row }, - { ZSTD_compressBlock_greedy_dictMatchState_row, - ZSTD_compressBlock_lazy_dictMatchState_row, - ZSTD_compressBlock_lazy2_dictMatchState_row }, - { ZSTD_compressBlock_greedy_dedicatedDictSearch_row, - ZSTD_compressBlock_lazy_dedicatedDictSearch_row, - ZSTD_compressBlock_lazy2_dedicatedDictSearch_row } + { +#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR + NULL, +#else + ZSTD_compressBlock_greedy_row, +#endif +#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR + NULL, +#else + ZSTD_compressBlock_lazy_row, +#endif +#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR + NULL, +#else + ZSTD_compressBlock_lazy2_row +#endif + }, + { +#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR + NULL, +#else + ZSTD_compressBlock_greedy_extDict_row, +#endif +#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR + NULL, +#else + ZSTD_compressBlock_lazy_extDict_row, +#endif +#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR + NULL, +#else + ZSTD_compressBlock_lazy2_extDict_row +#endif + }, + { +#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR + NULL, +#else + ZSTD_compressBlock_greedy_dictMatchState_row, +#endif +#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR + NULL, +#else + ZSTD_compressBlock_lazy_dictMatchState_row, +#endif +#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR + NULL, +#else + ZSTD_compressBlock_lazy2_dictMatchState_row +#endif + }, + { +#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR + NULL, +#else + ZSTD_compressBlock_greedy_dedicatedDictSearch_row, +#endif +#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR + NULL, +#else + ZSTD_compressBlock_lazy_dedicatedDictSearch_row, +#endif +#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR + NULL, +#else + ZSTD_compressBlock_lazy2_dedicatedDictSearch_row +#endif + } }; DEBUGLOG(4, "Selecting a row-based matchfinder"); assert(useRowMatchFinder != ZSTD_ps_auto); From 50cdf84f58e1d8f989877db453fdfe9d63c1925e Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 19 Apr 2023 15:38:04 -0400 Subject: [PATCH 022/283] Macro-Exclude Block Compressors from Declaration/Definition --- lib/compress/zstd_compress.c | 11 ++++++ lib/compress/zstd_double_fast.c | 4 ++ lib/compress/zstd_double_fast.h | 3 ++ lib/compress/zstd_lazy.c | 68 +++++++++++++++++++++++++++++++-- lib/compress/zstd_lazy.h | 59 ++++++++++++++++++++++++++++ lib/compress/zstd_ldm.c | 2 + lib/compress/zstd_opt.c | 22 +++++++++++ lib/compress/zstd_opt.h | 19 +++++++++ 8 files changed, 185 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 08bfd54c322..52d1c1963b8 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4926,12 +4926,17 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_fillHashTable(ms, iend, dtlm, tfp); break; case ZSTD_dfast: +#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp); +#endif break; case ZSTD_greedy: case ZSTD_lazy: case ZSTD_lazy2: +#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) assert(srcSize >= HASH_READ_SIZE); if (ms->dedicatedDictSearch) { assert(ms->chainTable != NULL); @@ -4948,14 +4953,20 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, DEBUGLOG(4, "Using chain-based hash table for lazy dict"); } } +#endif break; case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ case ZSTD_btopt: case ZSTD_btultra: case ZSTD_btultra2: +#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR) assert(srcSize >= HASH_READ_SIZE); ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); +#endif break; default: diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 0ad88ffc7bd..d4544b39051 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -11,6 +11,8 @@ #include "zstd_compress_internal.h" #include "zstd_double_fast.h" +#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR + static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms, void const* end, ZSTD_dictTableLoadMethod_e dtlm) { @@ -756,3 +758,5 @@ size_t ZSTD_compressBlock_doubleFast_extDict( return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); } } + +#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */ diff --git a/lib/compress/zstd_double_fast.h b/lib/compress/zstd_double_fast.h index 6f0047c4ba7..f1a86d5c489 100644 --- a/lib/compress/zstd_double_fast.h +++ b/lib/compress/zstd_double_fast.h @@ -18,6 +18,8 @@ extern "C" { #include "../common/mem.h" /* U32 */ #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ +#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR + void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, void const* end, ZSTD_dictTableLoadMethod_e dtlm, ZSTD_tableFillPurpose_e tfp); @@ -31,6 +33,7 @@ size_t ZSTD_compressBlock_doubleFast_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */ #if defined (__cplusplus) } diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 5ba88e8678f..7a0affbd062 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -12,6 +12,11 @@ #include "zstd_lazy.h" #include "../common/bits.h" /* ZSTD_countTrailingZeros64 */ +#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) + #define kLazySkippingStep 8 @@ -1754,151 +1759,194 @@ ZSTD_compressBlock_lazy_generic( /* Return the last literals size */ return (size_t)(iend - anchor); } +#endif /* build exclusions */ +#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btlazy2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); } +#endif +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); } +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); } +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); } +#endif +#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btlazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); } +#endif +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); } +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); } +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); } +#endif - +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); } +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); } +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); } +#endif /* Row-based matchfinder */ +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); } +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); } +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); } +#endif +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); } +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); } +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); } +#endif - +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch); } +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); } +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); } +#endif +#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_lazy_extDict_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, @@ -2101,15 +2149,18 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( /* Return the last literals size */ return (size_t)(iend - anchor); } +#endif /* build exclusions */ - +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); } +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) @@ -2117,7 +2168,9 @@ size_t ZSTD_compressBlock_lazy_extDict( { return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); } +#endif +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) @@ -2125,7 +2178,9 @@ size_t ZSTD_compressBlock_lazy2_extDict( { return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); } +#endif +#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btlazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) @@ -2133,14 +2188,18 @@ size_t ZSTD_compressBlock_btlazy2_extDict( { return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); } +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); } +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) @@ -2148,10 +2207,13 @@ size_t ZSTD_compressBlock_lazy_extDict_row( { return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); } +#endif +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); } +#endif diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h index 3bde67331e4..7e11d0cf126 100644 --- a/lib/compress/zstd_lazy.h +++ b/lib/compress/zstd_lazy.h @@ -27,97 +27,156 @@ extern "C" { #define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ +#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ +#endif +#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btlazy2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btlazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btlazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif #if defined (__cplusplus) diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index ec0690ccbf5..8581e2356fb 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -246,7 +246,9 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, break; case ZSTD_dfast: +#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); +#endif break; case ZSTD_greedy: diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index f02a760946e..4b0fbfafe0a 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -12,6 +12,10 @@ #include "hist.h" #include "zstd_opt.h" +#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR) #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ #define ZSTD_MAX_PRICE (1<<30) @@ -1360,7 +1364,9 @@ static size_t ZSTD_compressBlock_opt2( { return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode); } +#endif /* build exclusions */ +#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btopt( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) @@ -1368,10 +1374,13 @@ size_t ZSTD_compressBlock_btopt( DEBUGLOG(5, "ZSTD_compressBlock_btopt"); return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict); } +#endif +#if !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR) /* ZSTD_initStats_ultra(): * make a first compression pass, just to seed stats with more accurate starting values. * only works on first block, with no dictionary and no ldm. @@ -1402,7 +1411,9 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, ms->nextToUpdate = ms->window.dictLimit; } +#endif /* build exclusions */ +#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) @@ -1410,7 +1421,9 @@ size_t ZSTD_compressBlock_btultra( DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); } +#endif +#ifndef ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) @@ -1438,34 +1451,43 @@ size_t ZSTD_compressBlock_btultra2( return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); } +#endif +#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btopt_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); } +#endif +#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); } +#endif +#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btopt_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); } +#endif +#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict); } +#endif /* note : no btultra2 variant for extDict nor dictMatchState, * because btultra2 is not meant to work with dictionaries diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 342e5a31127..141981da0ab 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -17,33 +17,52 @@ extern "C" { #include "zstd_compress_internal.h" +#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR) /* used in ZSTD_loadDictionaryContent() */ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); +#endif +#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btopt( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btopt_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btopt_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif +#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#endif /* note : no btultra2 variant for extDict nor dictMatchState, * because btultra2 is not meant to work with dictionaries From 5a75956001efbde704eedad755daae2816273a74 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 19 Apr 2023 17:53:40 -0400 Subject: [PATCH 023/283] Adjust Strategy in CParams to Avoid Using Excluded Block Compressors --- lib/compress/zstd_compress.c | 44 ++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 52d1c1963b8..cbe10aa9897 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1467,6 +1467,50 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); assert(ZSTD_checkCParams(cPar)==0); + /* Cascade the selected strategy down to the next-highest one built into + * this binary. */ +#ifdef ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_btultra2) { + cPar.strategy = ZSTD_btultra; + } +#endif +#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_btultra) { + cPar.strategy = ZSTD_btopt; + } +#endif +#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_btopt) { + cPar.strategy = ZSTD_btlazy2; + } +#endif +#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_btlazy2) { + cPar.strategy = ZSTD_lazy2; + } +#endif +#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_lazy2) { + cPar.strategy = ZSTD_lazy; + } +#endif +#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_lazy) { + cPar.strategy = ZSTD_greedy; + } +#endif +#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_greedy) { + cPar.strategy = ZSTD_dfast; + } +#endif +#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_dfast) { + cPar.strategy = ZSTD_fast; + cPar.targetLength = 0; + } +#endif + switch (mode) { case ZSTD_cpm_unknown: case ZSTD_cpm_noAttachDict: From 16bbd7437cf67a748ac22349a3ff974a518a3d66 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 20 Apr 2023 11:46:04 -0400 Subject: [PATCH 024/283] Avoid Ratio Regression Tests When Compressors are Excluded --- tests/fuzzer.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index b049a9e09ba..c17cdcd4a93 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -2426,6 +2426,15 @@ static int basicUnitTests(U32 const seed, double compressibility) } } DISPLAYLEVEL(3, "OK \n"); +#if !defined(ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR) \ + && !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ + && !defined(ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR) \ + && !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ + && !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ + && !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ + && !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ + && !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) \ + && !defined(ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR) /* Note : these tests should be replaced by proper regression tests, * but existing ones do not focus on small data + dictionary + all levels. */ @@ -2524,6 +2533,7 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(4, "compression efficiency tests OK \n"); } +#endif ZSTD_freeCCtx(ctxOrig); ZSTD_freeCCtx(ctxDuplicated); From 6761e1c949b99050f79a90a333c3432ba7cf3f22 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 20 Apr 2023 11:46:29 -0400 Subject: [PATCH 025/283] Tweak Ultra/Opt Guards --- lib/compress/zstd_opt.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 4b0fbfafe0a..2856398cf8a 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1350,21 +1350,26 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* Return the last literals size */ return (size_t)(iend - anchor); } +#endif /* build exclusions */ +#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR static size_t ZSTD_compressBlock_opt0( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) { return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode); } +#endif +#if !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR) static size_t ZSTD_compressBlock_opt2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) { return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode); } -#endif /* build exclusions */ +#endif #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btopt( @@ -1379,8 +1384,7 @@ size_t ZSTD_compressBlock_btopt( -#if !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) \ - || !defined(ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR) +#ifndef ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR /* ZSTD_initStats_ultra(): * make a first compression pass, just to seed stats with more accurate starting values. * only works on first block, with no dictionary and no ldm. From b12e8cb3e73c2eb0d176eb9b4dd0ff943b766242 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 20 Apr 2023 11:53:23 -0400 Subject: [PATCH 026/283] Merge Ultra and Ultra2 Exclusion Ultra2 does not exist for dict compression, and so uses ultra. So ultra must be present if ultra2 is. --- lib/compress/zstd_compress.c | 12 +++--------- lib/compress/zstd_opt.c | 10 +++------- lib/compress/zstd_opt.h | 5 +---- tests/fuzzer.c | 3 +-- 4 files changed, 8 insertions(+), 22 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index cbe10aa9897..c511519801e 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1469,12 +1469,10 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, /* Cascade the selected strategy down to the next-highest one built into * this binary. */ -#ifdef ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR +#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR if (cPar.strategy == ZSTD_btultra2) { cPar.strategy = ZSTD_btultra; } -#endif -#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR if (cPar.strategy == ZSTD_btultra) { cPar.strategy = ZSTD_btopt; } @@ -3068,12 +3066,9 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS #endif #ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR NULL, -#else - ZSTD_compressBlock_btultra, -#endif -#ifdef ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR NULL #else + ZSTD_compressBlock_btultra, ZSTD_compressBlock_btultra2 #endif }, @@ -5006,8 +5001,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, case ZSTD_btultra2: #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ - || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) \ - || !defined(ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR) + || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) assert(srcSize >= HASH_READ_SIZE); ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); #endif diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 2856398cf8a..edcd65e21ca 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -14,8 +14,7 @@ #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ - || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) \ - || !defined(ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR) + || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ #define ZSTD_MAX_PRICE (1<<30) @@ -1361,8 +1360,7 @@ static size_t ZSTD_compressBlock_opt0( } #endif -#if !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) \ - || !defined(ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR) +#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR static size_t ZSTD_compressBlock_opt2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) @@ -1384,7 +1382,7 @@ size_t ZSTD_compressBlock_btopt( -#ifndef ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR +#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR /* ZSTD_initStats_ultra(): * make a first compression pass, just to seed stats with more accurate starting values. * only works on first block, with no dictionary and no ldm. @@ -1425,9 +1423,7 @@ size_t ZSTD_compressBlock_btultra( DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); } -#endif -#ifndef ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 141981da0ab..e503df32b94 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -19,8 +19,7 @@ extern "C" { #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ - || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) \ - || !defined(ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR) + || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) /* used in ZSTD_loadDictionaryContent() */ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); #endif @@ -34,8 +33,6 @@ size_t ZSTD_compressBlock_btopt( size_t ZSTD_compressBlock_btultra( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#endif -#ifndef ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index c17cdcd4a93..4a68ff73168 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -2433,8 +2433,7 @@ static int basicUnitTests(U32 const seed, double compressibility) && !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ && !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ && !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ - && !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) \ - && !defined(ZSTD_EXCLUDE_BTULTRA2_BLOCK_COMPRESSOR) + && !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) /* Note : these tests should be replaced by proper regression tests, * but existing ones do not focus on small data + dictionary + all levels. */ From 39b7946b95dc4359d7a9546ede906489682dd0d9 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 21 Apr 2023 11:32:41 -0400 Subject: [PATCH 027/283] Define Macros for Possibly-Present Functions; Use Them Rather than Ifdef Guards --- lib/compress/zstd_compress.c | 225 ++++++-------------------------- lib/compress/zstd_double_fast.h | 8 ++ lib/compress/zstd_lazy.h | 81 ++++++++++++ lib/compress/zstd_opt.h | 21 +++ 4 files changed, 149 insertions(+), 186 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c511519801e..541b776f1d1 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3034,142 +3034,43 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { { ZSTD_compressBlock_fast /* default for 0 */, ZSTD_compressBlock_fast, -#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_doubleFast, -#endif -#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_greedy, -#endif -#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy, -#endif -#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy2, -#endif -#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_btlazy2, -#endif -#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_btopt, -#endif -#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR - NULL, - NULL -#else - ZSTD_compressBlock_btultra, - ZSTD_compressBlock_btultra2 -#endif + ZSTD_COMPRESSBLOCK_DOUBLEFAST, + ZSTD_COMPRESSBLOCK_GREEDY, + ZSTD_COMPRESSBLOCK_LAZY, + ZSTD_COMPRESSBLOCK_LAZY2, + ZSTD_COMPRESSBLOCK_BTLAZY2, + ZSTD_COMPRESSBLOCK_BTOPT, + ZSTD_COMPRESSBLOCK_BTULTRA, + ZSTD_COMPRESSBLOCK_BTULTRA2 }, { ZSTD_compressBlock_fast_extDict /* default for 0 */, ZSTD_compressBlock_fast_extDict, -#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_doubleFast_extDict, -#endif -#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_greedy_extDict, -#endif -#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy_extDict, -#endif -#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy2_extDict, -#endif -#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_btlazy2_extDict, -#endif -#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_btopt_extDict, -#endif -#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR - NULL, - NULL -#else - ZSTD_compressBlock_btultra_extDict, - ZSTD_compressBlock_btultra_extDict -#endif + ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT, + ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT, + ZSTD_COMPRESSBLOCK_LAZY_EXTDICT, + ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT, + ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT, + ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT, + ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT, + ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT }, { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, ZSTD_compressBlock_fast_dictMatchState, -#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_doubleFast_dictMatchState, -#endif -#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_greedy_dictMatchState, -#endif -#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy_dictMatchState, -#endif -#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy2_dictMatchState, -#endif -#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_btlazy2_dictMatchState, -#endif -#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_btopt_dictMatchState, -#endif -#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR - NULL, - NULL -#else - ZSTD_compressBlock_btultra_dictMatchState, - ZSTD_compressBlock_btultra_dictMatchState -#endif + ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE, + ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE, + ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE, + ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE, + ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE, + ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE, + ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE, + ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE }, { NULL /* default for 0 */, NULL, NULL, -#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_greedy_dedicatedDictSearch, -#endif -#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy_dedicatedDictSearch, -#endif -#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy2_dedicatedDictSearch, -#endif + ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH, + ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH, + ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH, NULL, NULL, NULL, @@ -3183,72 +3084,24 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) { static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { { -#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_greedy_row, -#endif -#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy_row, -#endif -#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy2_row -#endif + ZSTD_COMPRESSBLOCK_GREEDY_ROW, + ZSTD_COMPRESSBLOCK_LAZY_ROW, + ZSTD_COMPRESSBLOCK_LAZY2_ROW }, { -#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_greedy_extDict_row, -#endif -#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy_extDict_row, -#endif -#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy2_extDict_row -#endif + ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW, + ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW, + ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW }, { -#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_greedy_dictMatchState_row, -#endif -#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy_dictMatchState_row, -#endif -#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy2_dictMatchState_row -#endif + ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW, + ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW, + ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW }, { -#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_greedy_dedicatedDictSearch_row, -#endif -#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy_dedicatedDictSearch_row, -#endif -#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR - NULL, -#else - ZSTD_compressBlock_lazy2_dedicatedDictSearch_row -#endif + ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW, + ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW, + ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW } }; DEBUGLOG(4, "Selecting a row-based matchfinder"); diff --git a/lib/compress/zstd_double_fast.h b/lib/compress/zstd_double_fast.h index f1a86d5c489..ce6ed8c97fd 100644 --- a/lib/compress/zstd_double_fast.h +++ b/lib/compress/zstd_double_fast.h @@ -23,6 +23,7 @@ extern "C" { void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, void const* end, ZSTD_dictTableLoadMethod_e dtlm, ZSTD_tableFillPurpose_e tfp); + size_t ZSTD_compressBlock_doubleFast( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); @@ -33,6 +34,13 @@ size_t ZSTD_compressBlock_doubleFast_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast +#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE ZSTD_compressBlock_doubleFast_dictMatchState +#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT ZSTD_compressBlock_doubleFast_extDict +#else +#define ZSTD_COMPRESSBLOCK_DOUBLEFAST NULL +#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE NULL +#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL #endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */ #if defined (__cplusplus) diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h index 7e11d0cf126..da320030f02 100644 --- a/lib/compress/zstd_lazy.h +++ b/lib/compress/zstd_lazy.h @@ -43,139 +43,220 @@ void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const redu size_t ZSTD_compressBlock_btlazy2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2 +#else +#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL #endif #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2 +#else +#define ZSTD_COMPRESSBLOCK_LAZY2 NULL #endif #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy +#else +#define ZSTD_COMPRESSBLOCK_LAZY NULL #endif #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy +#else +#define ZSTD_COMPRESSBLOCK_GREEDY NULL #endif #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row +#else +#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL #endif #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row +#else +#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL #endif #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row +#else +#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL #endif #ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btlazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState +#else +#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL #endif #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState +#else +#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL #endif #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState +#else +#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL #endif #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState +#else +#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL #endif #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row +#else +#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL #endif #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row +#else +#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL #endif #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row +#else +#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL #endif #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch +#else +#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL #endif #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch +#else +#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL #endif #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch +#else +#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL #endif #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row +#else +#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL #endif #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row +#else +#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL #endif #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row +#else +#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL #endif #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict +#else +#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL #endif #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict +#else +#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL #endif #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict +#else +#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL #endif #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row +#else +#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL #endif #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row +#else +#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL #endif #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row +#else +#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL #endif #ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btlazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict +#else +#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL #endif diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index e503df32b94..694e317657d 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -28,14 +28,23 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); size_t ZSTD_compressBlock_btopt( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt +#else +#define ZSTD_COMPRESSBLOCK_BTOPT NULL #endif + #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra size_t ZSTD_compressBlock_btultra2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2 +#else +#define ZSTD_COMPRESSBLOCK_BTULTRA NULL +#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL #endif @@ -43,22 +52,34 @@ size_t ZSTD_compressBlock_btultra2( size_t ZSTD_compressBlock_btopt_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState +#else +#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL #endif #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState +#else +#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL #endif #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btopt_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict +#else +#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL #endif #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict +#else +#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL #endif /* note : no btultra2 variant for extDict nor dictMatchState, From bae174960b4abd8cefadf23f323b2c82829538e6 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 21 Apr 2023 16:14:11 -0400 Subject: [PATCH 028/283] Add ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP Build Variable --- lib/libzstd.mk | 6 ++++++ tests/playTests.sh | 10 ++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/libzstd.mk b/lib/libzstd.mk index 5e11d5d294e..0b1e03907c1 100644 --- a/lib/libzstd.mk +++ b/lib/libzstd.mk @@ -47,6 +47,8 @@ endif # Assembly support ZSTD_NO_ASM ?= 0 +ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP ?= 0 + ################################################################## # libzstd helpers ################################################################## @@ -178,6 +180,10 @@ ifneq ($(ZSTD_LEGACY_MULTITHREADED_API), 0) CFLAGS += -DZSTD_LEGACY_MULTITHREADED_API endif +ifneq ($(ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP), 0) + CFLAGS += -DZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR +endif + ifneq ($(ZSTD_LEGACY_SUPPORT), 0) ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0) ZSTD_LEGACY_FILES += $(shell ls $(LIBZSTD)/legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]') diff --git a/tests/playTests.sh b/tests/playTests.sh index 5f595f61154..718398f8230 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -1708,8 +1708,14 @@ zstd --patch-from=tmp_dict -r tmp_dir && die rm -rf tmp* println "\n===> patch-from long mode trigger larger file test" -datagen -g5000000 > tmp_dict -datagen -g5000000 > tmp_patch +if [ "$ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP" -eq "1" ]; then + # if binary tree strategies are excluded, the threshold is different + datagen -g10000000 > tmp_dict + datagen -g10000000 > tmp_patch +else + datagen -g5000000 > tmp_dict + datagen -g5000000 > tmp_patch +fi zstd -15 --patch-from=tmp_dict tmp_patch 2>&1 | grep "long mode automatically triggered" rm -rf tmp* From 698af84fcf8bdcfa3db4936a88e84c354331a84a Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 21 Apr 2023 16:14:27 -0400 Subject: [PATCH 029/283] Add CI Test for Excluding Matchfinders --- .github/workflows/dev-short-tests.yml | 1 + tests/playTests.sh | 34 ++++++++++++++------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml index d6f2db88641..566c84d0171 100644 --- a/.github/workflows/dev-short-tests.yml +++ b/.github/workflows/dev-short-tests.yml @@ -348,6 +348,7 @@ jobs: make clean && make check MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG" make clean && make -j all MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS" make clean && make check MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS" + make clean && make check ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP=1 MOREFLAGS="-Werror" dynamic-bmi2: runs-on: ubuntu-latest diff --git a/tests/playTests.sh b/tests/playTests.sh index 718398f8230..f51776170bd 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -1602,22 +1602,24 @@ roundTripTest -g1M -P50 "1 --single-thread --long=29" " --long=28 --memory=512MB roundTripTest -g1M -P50 "1 --single-thread --long=29" " --zstd=wlog=28 --memory=512MB" -println "\n===> zstd long distance matching with optimal parser compressed size tests " -optCSize16=$(datagen -g511K | zstd -16 -c | wc -c) -longCSize16=$(datagen -g511K | zstd -16 --long -c | wc -c) -optCSize19=$(datagen -g2M | zstd -19 -c | wc -c) -longCSize19=$(datagen -g2M | zstd -19 --long -c | wc -c) -optCSize19wlog23=$(datagen -g2M | zstd -19 -c --zstd=wlog=23 | wc -c) -longCSize19wlog23=$(datagen -g2M | zstd -19 -c --long=23 | wc -c) -if [ "$longCSize16" -gt "$optCSize16" ]; then - echo using --long on compression level 16 should not cause compressed size regression - exit 1 -elif [ "$longCSize19" -gt "$optCSize19" ]; then - echo using --long on compression level 19 should not cause compressed size regression - exit 1 -elif [ "$longCSize19wlog23" -gt "$optCSize19wlog23" ]; then - echo using --long on compression level 19 with wLog=23 should not cause compressed size regression - exit 1 +if [ "$ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP" -ne "1" ]; then + println "\n===> zstd long distance matching with optimal parser compressed size tests " + optCSize16=$(datagen -g511K | zstd -16 -c | wc -c) + longCSize16=$(datagen -g511K | zstd -16 --long -c | wc -c) + optCSize19=$(datagen -g2M | zstd -19 -c | wc -c) + longCSize19=$(datagen -g2M | zstd -19 --long -c | wc -c) + optCSize19wlog23=$(datagen -g2M | zstd -19 -c --zstd=wlog=23 | wc -c) + longCSize19wlog23=$(datagen -g2M | zstd -19 -c --long=23 | wc -c) + if [ "$longCSize16" -gt "$optCSize16" ]; then + echo using --long on compression level 16 should not cause compressed size regression + exit 1 + elif [ "$longCSize19" -gt "$optCSize19" ]; then + echo using --long on compression level 19 should not cause compressed size regression + exit 1 + elif [ "$longCSize19wlog23" -gt "$optCSize19wlog23" ]; then + echo using --long on compression level 19 with wLog=23 should not cause compressed size regression + exit 1 + fi fi println "\n===> zstd asyncio tests " From f242f5be8f0d57fb9b49f22f35032953072471cc Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 24 Apr 2023 10:40:52 -0400 Subject: [PATCH 030/283] Re-Order Lazy Declarations; Minimize ifndefs --- lib/compress/zstd_lazy.h | 233 ++++++++++++++------------------------- 1 file changed, 84 insertions(+), 149 deletions(-) diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h index da320030f02..3635813bddf 100644 --- a/lib/compress/zstd_lazy.h +++ b/lib/compress/zstd_lazy.h @@ -39,223 +39,158 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ #endif -#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_btlazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2 -#else -#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL -#endif -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2 -#else -#define ZSTD_COMPRESSBLOCK_LAZY2 NULL -#endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy -#else -#define ZSTD_COMPRESSBLOCK_LAZY NULL -#endif #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_greedy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy -#else -#define ZSTD_COMPRESSBLOCK_GREEDY NULL -#endif -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy2_row( +size_t ZSTD_compressBlock_greedy_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row -#else -#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL -#endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy_row( +size_t ZSTD_compressBlock_greedy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row -#else -#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL -#endif -#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_greedy_row( +size_t ZSTD_compressBlock_greedy_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row -#else -#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL -#endif - -#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_btlazy2_dictMatchState( +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState -#else -#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL -#endif -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy2_dictMatchState( +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState -#else -#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL -#endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy_dictMatchState( +size_t ZSTD_compressBlock_greedy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState -#else -#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL -#endif -#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_greedy_dictMatchState( +size_t ZSTD_compressBlock_greedy_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); + +#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy +#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row #define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState +#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row +#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch +#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row +#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict +#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row #else +#define ZSTD_COMPRESSBLOCK_GREEDY NULL +#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL #define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL +#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL +#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL +#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL +#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL +#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL #endif -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy2_dictMatchState_row( + +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_lazy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row -#else -#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL -#endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy_dictMatchState_row( +size_t ZSTD_compressBlock_lazy_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row -#else -#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL -#endif -#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_greedy_dictMatchState_row( +size_t ZSTD_compressBlock_lazy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row -#else -#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL -#endif - -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( +size_t ZSTD_compressBlock_lazy_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch -#else -#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL -#endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch -#else -#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL -#endif -#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch -#else -#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL -#endif -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( +size_t ZSTD_compressBlock_lazy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row -#else -#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL -#endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( +size_t ZSTD_compressBlock_lazy_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); + +#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy +#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row +#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState +#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row +#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch #define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row +#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict +#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row #else +#define ZSTD_COMPRESSBLOCK_LAZY NULL +#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL +#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL +#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL +#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL #define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL +#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL +#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL #endif -#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( + +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_lazy2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row -#else -#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL -#endif - -#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_greedy_extDict( +size_t ZSTD_compressBlock_lazy2_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict -#else -#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL -#endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy_extDict( +size_t ZSTD_compressBlock_lazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict -#else -#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL -#endif -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy2_extDict( +size_t ZSTD_compressBlock_lazy2_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict -#else -#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL -#endif -#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_greedy_extDict_row( +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row -#else -#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL -#endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy_extDict_row( +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row -#else -#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL -#endif -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); + +#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2 +#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row +#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState +#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row +#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch +#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row +#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict #define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row #else +#define ZSTD_COMPRESSBLOCK_LAZY2 NULL +#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL +#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL +#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL +#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL +#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL +#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL #define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL #endif + #ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_btlazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); + +#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2 +#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState #define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict #else +#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL +#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL #define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL #endif From b7add1dd67f24124f2ebc722effb322fb77ee92b Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 24 Apr 2023 11:50:01 -0400 Subject: [PATCH 031/283] Abort if Unsupported Parameters Used --- lib/compress/zstd_compress.c | 6 ++++++ lib/compress/zstd_ldm.c | 2 ++ 2 files changed, 8 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 541b776f1d1..09f0482394b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4820,6 +4820,8 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, case ZSTD_dfast: #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp); +#else + assert(0); /* shouldn't be called: cparams should've been adjusted. */ #endif break; @@ -4845,6 +4847,8 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, DEBUGLOG(4, "Using chain-based hash table for lazy dict"); } } +#else + assert(0); /* shouldn't be called: cparams should've been adjusted. */ #endif break; @@ -4857,6 +4861,8 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) assert(srcSize >= HASH_READ_SIZE); ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); +#else + assert(0); /* shouldn't be called: cparams should've been adjusted. */ #endif break; diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 8581e2356fb..eda45816e99 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -248,6 +248,8 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, case ZSTD_dfast: #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); +#else + assert(0); /* shouldn't be called: cparams should've been adjusted. */ #endif break; From d09f195ceb774bc0b3b7c764ddb907bc3de8c69e Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 24 Apr 2023 14:50:54 -0400 Subject: [PATCH 032/283] Remove blockCompressor NULL Checks --- lib/compress/zstd_compress.c | 2 -- lib/compress/zstd_ldm.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 09f0482394b..521e5e5d681 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3344,7 +3344,6 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) "External sequence producer returned error code %lu. Falling back to internal parser.", (unsigned long)nbExternalSeqs ); - RETURN_ERROR_IF(blockCompressor == NULL, parameter_combination_unsupported, "Got NULL block compressor!"); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } } } else { /* not long range mode and no external matchfinder */ @@ -3353,7 +3352,6 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, dictMode); ms->ldmSeqStore = NULL; - RETURN_ERROR_IF(blockCompressor == NULL, parameter_combination_unsupported, "Got NULL block compressor!"); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index eda45816e99..01c1f75aae4 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -676,8 +676,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, /* Input positions */ BYTE const* ip = istart; - RETURN_ERROR_IF(blockCompressor == NULL, parameter_combination_unsupported, "Got NULL block compressor!"); - DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); /* If using opt parser, use LDMs only as candidates rather than always accepting them */ if (cParams->strategy >= ZSTD_btopt) { From eb9227935ead3eff349dcdde296543ff097deae0 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 24 Apr 2023 16:53:30 -0400 Subject: [PATCH 033/283] Also Reorganize Zstd Opt Declarations --- lib/compress/zstd_opt.h | 59 ++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 36 deletions(-) diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 694e317657d..d4e71131572 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -28,63 +28,50 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); size_t ZSTD_compressBlock_btopt( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt -#else -#define ZSTD_COMPRESSBLOCK_BTOPT NULL -#endif - -#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_btultra( +size_t ZSTD_compressBlock_btopt_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra -size_t ZSTD_compressBlock_btultra2( +size_t ZSTD_compressBlock_btopt_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2 -#else -#define ZSTD_COMPRESSBLOCK_BTULTRA NULL -#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL -#endif - -#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_btopt_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); +#define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt #define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState +#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict #else +#define ZSTD_COMPRESSBLOCK_BTOPT NULL #define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL +#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL #endif + #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_btultra_dictMatchState( +size_t ZSTD_compressBlock_btultra( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState -#else -#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL -#endif - -#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_btopt_extDict( +size_t ZSTD_compressBlock_btultra_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict -#else -#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL -#endif -#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict -#else -#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL -#endif /* note : no btultra2 variant for extDict nor dictMatchState, * because btultra2 is not meant to work with dictionaries * and is only specific for the first block (no prefix) */ +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra +#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState +#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict +#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2 +#else +#define ZSTD_COMPRESSBLOCK_BTULTRA NULL +#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL +#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL +#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL +#endif #if defined (__cplusplus) } From cc1ffe0bd6561128f39cc6c673aa75c91a925b68 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 4 May 2023 12:20:02 -0400 Subject: [PATCH 034/283] Add Documentation to lib/README.md --- lib/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/README.md b/lib/README.md index c3b5d1817f3..2da16647a7f 100644 --- a/lib/README.md +++ b/lib/README.md @@ -119,6 +119,12 @@ The file structure is designed to make this selection manually achievable for an binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT` (implied by `ZSTD_LIB_MINIFY`). + On the compressor side, Zstd's compression levels map to several internal + strategies. In environments where the higher compression levels aren't used, + it is possible to exclude all but the fastest strategy with + `ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP=1`. (Note that this will change + the behavior of the default compression level.) + For squeezing the last ounce of size out, you can also define `ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`, which removes the error messages that are otherwise returned by From 5490c75ddaae98010985618832ab55ed7b98dbed Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 4 May 2023 12:31:41 -0400 Subject: [PATCH 035/283] Also Allow/Document/Test Excluding dfast and Up --- .github/workflows/dev-short-tests.yml | 1 + lib/README.md | 5 ++++- lib/libzstd.mk | 5 +++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml index 566c84d0171..9474190ffde 100644 --- a/.github/workflows/dev-short-tests.yml +++ b/.github/workflows/dev-short-tests.yml @@ -349,6 +349,7 @@ jobs: make clean && make -j all MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS" make clean && make check MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS" make clean && make check ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP=1 MOREFLAGS="-Werror" + make clean && make check ZSTD_LIB_EXCLUDE_COMPRESSORS_GREEDY_AND_UP=1 MOREFLAGS="-Werror" dynamic-bmi2: runs-on: ubuntu-latest diff --git a/lib/README.md b/lib/README.md index 2da16647a7f..47982093382 100644 --- a/lib/README.md +++ b/lib/README.md @@ -123,7 +123,10 @@ The file structure is designed to make this selection manually achievable for an strategies. In environments where the higher compression levels aren't used, it is possible to exclude all but the fastest strategy with `ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP=1`. (Note that this will change - the behavior of the default compression level.) + the behavior of the default compression level.) Or if you want to retain the + default compressor as well, you can set + `ZSTD_LIB_EXCLUDE_COMPRESSORS_GREEDY_AND_UP=1`, at the cost of an additional + ~20KB or so. For squeezing the last ounce of size out, you can also define `ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`, diff --git a/lib/libzstd.mk b/lib/libzstd.mk index 0b1e03907c1..2c47ecdfa06 100644 --- a/lib/libzstd.mk +++ b/lib/libzstd.mk @@ -48,6 +48,7 @@ endif ZSTD_NO_ASM ?= 0 ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP ?= 0 +ZSTD_LIB_EXCLUDE_COMPRESSORS_GREEDY_AND_UP ?= 0 ################################################################## # libzstd helpers @@ -182,6 +183,10 @@ endif ifneq ($(ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP), 0) CFLAGS += -DZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR +else +ifneq ($(ZSTD_LIB_EXCLUDE_COMPRESSORS_GREEDY_AND_UP), 0) + CFLAGS += -DZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR +endif endif ifneq ($(ZSTD_LEGACY_SUPPORT), 0) From 59c7b2a49247de8d2335e3a492135e9396ce8e84 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 22 May 2023 12:37:03 -0400 Subject: [PATCH 036/283] Reorder Definitions in zstd_lazy.c to Group Under Macro Guards --- lib/compress/zstd_lazy.c | 155 +++++++++++++++------------------------ 1 file changed, 58 insertions(+), 97 deletions(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 7a0affbd062..834520fd763 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -1762,161 +1762,130 @@ ZSTD_compressBlock_lazy_generic( #endif /* build exclusions */ -#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_btlazy2( +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_greedy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); } -#endif -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy2( +size_t ZSTD_compressBlock_greedy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); } -#endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy( +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); } -#endif -#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_greedy( +size_t ZSTD_compressBlock_greedy_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); } -#endif -#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_btlazy2_dictMatchState( +size_t ZSTD_compressBlock_greedy_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); } -#endif -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy2_dictMatchState( +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); } #endif #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy_dictMatchState( +size_t ZSTD_compressBlock_lazy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); } -#endif -#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_greedy_dictMatchState( +size_t ZSTD_compressBlock_lazy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); } -#endif -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); } -#endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( +size_t ZSTD_compressBlock_lazy_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); } -#endif -#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( +size_t ZSTD_compressBlock_lazy_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); } -#endif -/* Row-based matchfinder */ -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy2_row( +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); } #endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy_row( +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_lazy2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); } -#endif -#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_greedy_row( +size_t ZSTD_compressBlock_lazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); } -#endif -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy2_dictMatchState_row( +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); } -#endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy_dictMatchState_row( +size_t ZSTD_compressBlock_lazy2_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); } -#endif -#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_greedy_dictMatchState_row( +size_t ZSTD_compressBlock_lazy2_dictMatchState_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); } -#endif -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) @@ -1925,21 +1894,19 @@ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( } #endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( +#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_btlazy2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); } -#endif -#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( +size_t ZSTD_compressBlock_btlazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); } #endif @@ -2158,62 +2125,56 @@ size_t ZSTD_compressBlock_greedy_extDict( { return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); } -#endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy_extDict( +size_t ZSTD_compressBlock_greedy_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) - { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); } #endif -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy2_extDict( +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_lazy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); } -#endif -#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_btlazy2_extDict( +size_t ZSTD_compressBlock_lazy_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); } #endif -#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_greedy_extDict_row( +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_lazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) + { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); } -#endif -#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy_extDict_row( +size_t ZSTD_compressBlock_lazy2_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) - { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); } #endif -#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_lazy2_extDict_row( +#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_btlazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) + { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); } #endif From 1b65803fe7f506f5551d3946dc74f9fef8b87f71 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 22 May 2023 12:41:48 -0400 Subject: [PATCH 037/283] Reorder Definitions in zstd_opt.c to Group Under Macro Guards (Slightly) --- lib/compress/zstd_opt.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index edcd65e21ca..3d54e21aef8 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1413,9 +1413,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, ms->nextToUpdate = ms->window.dictLimit; } -#endif /* build exclusions */ -#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) @@ -1460,27 +1458,23 @@ size_t ZSTD_compressBlock_btopt_dictMatchState( { return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); } -#endif -#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_btultra_dictMatchState( +size_t ZSTD_compressBlock_btopt_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); + return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); } #endif -#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR -size_t ZSTD_compressBlock_btopt_extDict( +#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_btultra_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); + return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); } -#endif -#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR size_t ZSTD_compressBlock_btultra_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) From 1b994cbc57869cc73e6434acb639aab648fcc678 Mon Sep 17 00:00:00 2001 From: Duncan Horn Date: Mon, 1 May 2023 14:29:52 -0700 Subject: [PATCH 038/283] Get zstd working with ARM64EC on Windows --- lib/common/compiler.h | 2 +- lib/common/xxhash.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 73f8d01998b..79e773c0fbe 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -112,7 +112,7 @@ # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ #else -# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */ # include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) # define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) diff --git a/lib/common/xxhash.h b/lib/common/xxhash.h index b8b73290bbc..69572117a69 100644 --- a/lib/common/xxhash.h +++ b/lib/common/xxhash.h @@ -3166,7 +3166,7 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b) #if defined(XXH_NO_PREFETCH) # define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ #else -# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */ +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) && !defined(_M_ARM64EC) /* _mm_prefetch() not defined outside of x86/x64 */ # include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ # define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) From d01a2c69296cff9bd052b797b2be1055a96cd644 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 25 May 2023 14:35:49 -0700 Subject: [PATCH 039/283] Fix UBSAN issue (zero addition to NULL) Fix UBSAN issue that came up internally. --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 521e5e5d681..19446b63ff3 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -5951,7 +5951,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) { assert(input->pos >= zcs->stableIn_notConsumed); input->pos -= zcs->stableIn_notConsumed; - ip -= zcs->stableIn_notConsumed; + if (ip) ip -= zcs->stableIn_notConsumed; zcs->stableIn_notConsumed = 0; } if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { From 5059618295bc67f4f70eb6f12e6cf57b8d3de141 Mon Sep 17 00:00:00 2001 From: Tao He Date: Thu, 25 May 2023 22:48:01 +0800 Subject: [PATCH 040/283] Add options in Makefile to cmake Signed-off-by: Tao He --- build/cmake/lib/CMakeLists.txt | 64 +++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index 30349586ba9..457b547978a 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -12,6 +12,22 @@ project(libzstd C ASM) set(CMAKE_INCLUDE_CURRENT_DIR TRUE) option(ZSTD_BUILD_STATIC "BUILD STATIC LIBRARIES" ON) option(ZSTD_BUILD_SHARED "BUILD SHARED LIBRARIES" ON) +option(ZSTD_BUILD_COMPRESSION "BUILD COMPRESSION MODULE" ON) +option(ZSTD_BUILD_DECOMPRESSION "BUILD DECOMPRESSION MODUEL" ON) +option(ZSTD_BUILD_DICTBUILDER "BUILD DICTBUILDER MODULE" ON) +option(ZSTD_BUILD_DEPRECATED "BUILD DEPRECATED MODULE" OFF) + +set(ZSTDLIB_VISIBLE "" CACHE STRING "Visiblity for ZSTDLIB API") +set(ZSTDERRORLIB_VISIBLE "" CACHE STRING "Visiblity for ZSTDERRORLIB_VISIBLE API") +set(ZDICTLIB_VISIBLE "" CACHE STRING "Visiblity for ZDICTLIB_VISIBLE API") +set(ZSTDLIB_STATIC_API "" CACHE STRING "Visiblity for ZSTDLIB_STATIC_API API") +set(ZDICTLIB_STATIC_API "" CACHE STRING "Visiblity for ZDICTLIB_STATIC_API API") + +set_property(CACHE ZSTDLIB_VISIBLE PROPERTY STRINGS "" "hidden" "default" "protected" "internal") +set_property(CACHE ZSTDERRORLIB_VISIBLE PROPERTY STRINGS "" "hidden" "default" "protected" "internal") +set_property(CACHE ZDICTLIB_VISIBLE PROPERTY STRINGS "" "hidden" "default" "protected" "internal") +set_property(CACHE ZSTDLIB_STATIC_API PROPERTY STRINGS "" "hidden" "default" "protected" "internal") +set_property(CACHE ZDICTLIB_STATIC_API PROPERTY STRINGS "" "hidden" "default" "protected" "internal") if(NOT ZSTD_BUILD_SHARED AND NOT ZSTD_BUILD_STATIC) message(SEND_ERROR "You need to build at least one flavor of libzstd") @@ -29,24 +45,32 @@ else () file(GLOB DecompressSources ${LIBRARY_DIR}/decompress/*.c ${LIBRARY_DIR}/decompress/*.S) endif () file(GLOB DictBuilderSources ${LIBRARY_DIR}/dictBuilder/*.c) - -set(Sources - ${CommonSources} - ${CompressSources} - ${DecompressSources} - ${DictBuilderSources}) +file(GLOB DeprecatedSources ${LIBRARY_DIR}/deprecated/*.c) file(GLOB CommonHeaders ${LIBRARY_DIR}/common/*.h) file(GLOB CompressHeaders ${LIBRARY_DIR}/compress/*.h) file(GLOB DecompressHeaders ${LIBRARY_DIR}/decompress/*.h) file(GLOB DictBuilderHeaders ${LIBRARY_DIR}/dictBuilder/*.h) +file(GLOB DeprecatedHeaders ${LIBRARY_DIR}/deprecated/*.h) -set(Headers - ${LIBRARY_DIR}/zstd.h - ${CommonHeaders} - ${CompressHeaders} - ${DecompressHeaders} - ${DictBuilderHeaders}) +set(Sources ${CommonSources}) +set(Headers ${LIBRARY_DIR}/zstd.h ${CommonHeaders}) +if (ZSTD_BUILD_COMPRESSION) + set(Sources ${Sources} ${CompressSources}) + set(Headers ${Headers} ${CompressHeaders}) +endif() +if (ZSTD_BUILD_DECOMPRESSION) + set(Sources ${Sources} ${DecompressSources}) + set(Headers ${Headers} ${DecompressHeaders}) +endif() +if (ZSTD_BUILD_DICTBUILDER) + set(Sources ${Sources} ${DictBuilderSources}) + set(Headers ${Headers} ${DictBuilderHeaders}) +endif() +if (ZSTD_BUILD_DEPRECATED) + set(Sources ${Sources} ${DeprecatedSources}) + set(Headers ${Headers} ${DeprecatedHeaders}) +endif() if (ZSTD_LEGACY_SUPPORT) set(LIBRARY_LEGACY_DIR ${LIBRARY_DIR}/legacy) @@ -83,6 +107,12 @@ endif () # macros. set_source_files_properties(${Sources} PROPERTIES LANGUAGE C) +macro (add_definition target var) + if (NOT ("${${var}}" STREQUAL "")) + set_property(TARGET ${target} APPEND PROPERTY COMPILE_DEFINITIONS "${var}=__attribute__((visibility(\"${${var}}\")))") + endif () +endmacro () + # Split project to static and shared libraries build set(library_targets) if (ZSTD_BUILD_SHARED) @@ -93,7 +123,10 @@ if (ZSTD_BUILD_SHARED) if (UNIX) target_link_libraries(libzstd_shared ${THREADS_LIBS}) endif () - endif() + endif () + add_definition(libzstd_shared ZSTDLIB_VISIBLE) + add_definition(libzstd_shared ZSTDERRORLIB_VISIBLE) + add_definition(libzstd_shared ZDICTLIB_VISIBLE) endif () if (ZSTD_BUILD_STATIC) add_library(libzstd_static STATIC ${Sources} ${Headers}) @@ -104,6 +137,11 @@ if (ZSTD_BUILD_STATIC) target_link_libraries(libzstd_static ${THREADS_LIBS}) endif () endif () + add_definition(libzstd_static ZSTDLIB_VISIBLE) + add_definition(libzstd_static ZSTDERRORLIB_VISIBLE) + add_definition(libzstd_static ZDICTLIB_VISIBLE) + add_definition(libzstd_static ZSTDLIB_STATIC_API) + add_definition(libzstd_static ZDICTLIB_STATIC_API) endif () # Add specific compile definitions for MSVC project From 5108c9ac975b5e4ff62418584cc6c8934d747b38 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 27 May 2023 11:22:30 -0700 Subject: [PATCH 041/283] Fixed a bug in the educational decoder Credit to Igor Pavlov --- doc/educational_decoder/zstd_decompress.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c index 3196b78dc38..1da7c528d23 100644 --- a/doc/educational_decoder/zstd_decompress.c +++ b/doc/educational_decoder/zstd_decompress.c @@ -997,7 +997,8 @@ static void decompress_sequences(frame_context_t *const ctx, const size_t num_sequences); static sequence_command_t decode_sequence(sequence_states_t *const state, const u8 *const src, - i64 *const offset); + i64 *const offset, + int lastSequence); static void decode_seq_table(FSE_dtable *const table, istream_t *const in, const seq_part_t type, const seq_mode_t mode); @@ -1114,7 +1115,7 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in, for (size_t i = 0; i < num_sequences; i++) { // Decode sequences one by one - sequences[i] = decode_sequence(&states, src, &bit_offset); + sequences[i] = decode_sequence(&states, src, &bit_offset, i==num_sequences-1); } if (bit_offset != 0) { @@ -1125,7 +1126,8 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in, // Decode a single sequence and update the state static sequence_command_t decode_sequence(sequence_states_t *const states, const u8 *const src, - i64 *const offset) { + i64 *const offset, + int lastSequence) { // "Each symbol is a code in its own context, which specifies Baseline and // Number_of_Bits to add. Codes are FSE compressed, and interleaved with raw // additional bits in the same bitstream." @@ -1160,7 +1162,7 @@ static sequence_command_t decode_sequence(sequence_states_t *const states, // Literals_Length_State is updated, followed by Match_Length_State, and // then Offset_State." // If the stream is complete don't read bits to update state - if (*offset != 0) { + if (!lastSequence) { FSE_update_state(&states->ll_table, &states->ll_state, src, offset); FSE_update_state(&states->ml_table, &states->ml_state, src, offset); FSE_update_state(&states->of_table, &states->of_state, src, offset); From 94a2f2791f313d27b6a2c0293971954cdd66035b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 31 May 2023 13:29:53 -0700 Subject: [PATCH 042/283] changed LLU suffix into ULL for Visual 2012 and lower both suffixes are supposed to be valid, but for some reason, Visual 2012 and lower only support ULL. --- lib/zstd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd.h b/lib/zstd.h index e94f1018970..148f112d184 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -228,7 +228,7 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize) * for example to size a static array on stack. * Will produce constant value 0 if srcSize too large. */ -#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00LLU : 0xFF00FF00U) +#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U) #define ZSTD_COMPRESSBOUND(srcSize) (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ /* ZSTD_isError() : From 7e09f07b325b6e2a95e11776f23ff97716b7b924 Mon Sep 17 00:00:00 2001 From: Gregor Jasny Date: Fri, 2 Jun 2023 12:34:56 +0200 Subject: [PATCH 043/283] Fix Intel Xcode builds with assembly When forcing the source file language to `C`, Xcode enforces the file to be compiled as `C` by appending `-x c` to the compiler command line. For now try to limit the damage and only enforce the language if the ASM and C compilers differ. Reproducer (CMake `3.26.4`, Xcode `14.3`): ``` cmake -S build/cmake -B _b -GXcode -DCMAKE_OSX_ARCHITECTURES=x86_64 cmake --build _b ``` Fix: #3622 --- build/cmake/lib/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index 457b547978a..3cab017a7ce 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -105,7 +105,9 @@ endif () # Our assembly expects to be compiled by a C compiler, and is only enabled for # __GNUC__ compatible compilers. Otherwise all the ASM code is disabled by # macros. -set_source_files_properties(${Sources} PROPERTIES LANGUAGE C) +if(NOT CMAKE_ASM_COMPILER STREQUAL CMAKE_C_COMPILER) + set_source_files_properties(${Sources} PROPERTIES LANGUAGE C) +endif() macro (add_definition target var) if (NOT ("${${var}}" STREQUAL "")) From 1f83b7cfc459c2dbef00dc6276f790370e17aef6 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 5 Jun 2023 09:51:52 -0700 Subject: [PATCH 044/283] fix a minor inefficiency in compress_superblock and in `decodecorpus`: the specific case `nbSeq=127` can be represented using the 1-byte format. Note that both the 1-byte and the 2-bytes formats are valid to represent this case, so there was no "error", produced data remains valid, it's just that the 1-byte format is more efficient. fix #3667 Credit to @ip7z for finding this issue. --- doc/zstd_compression_format.md | 5 +++-- lib/compress/zstd_compress_superblock.c | 2 +- tests/decodecorpus.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md index 3843bf39055..2a69c4c30ae 100644 --- a/doc/zstd_compression_format.md +++ b/doc/zstd_compression_format.md @@ -655,8 +655,9 @@ Let's call its first byte `byte0`. Decompressed content is defined entirely as Literals Section content. The FSE tables used in `Repeat_Mode` aren't updated. - `if (byte0 < 128)` : `Number_of_Sequences = byte0` . Uses 1 byte. -- `if (byte0 < 255)` : `Number_of_Sequences = ((byte0-128) << 8) + byte1` . Uses 2 bytes. -- `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00` . Uses 3 bytes. +- `if (byte0 < 255)` : `Number_of_Sequences = ((byte0 - 0x80) << 8) + byte1`. Uses 2 bytes. + Note that the 2 bytes format fully overlaps the 1 byte format. +- `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00`. Uses 3 bytes. __Symbol compression modes__ diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index 638c4acbe70..dacaf85dbc2 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -180,7 +180,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, /* Sequences Header */ RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, dstSize_tooSmall, ""); - if (nbSeq < 0x7F) + if (nbSeq < 128) *op++ = (BYTE)nbSeq; else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c index e48eccd6df7..a440ae38af2 100644 --- a/tests/decodecorpus.c +++ b/tests/decodecorpus.c @@ -825,7 +825,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, /* Sequences Header */ if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); - if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; + if (nbSeq < 128) *op++ = (BYTE)nbSeq; else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; From 3732a08f5b82ed87a744e65daa2f11f77dabe954 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 5 Jun 2023 16:03:00 -0700 Subject: [PATCH 045/283] fixed decoder behavior when nbSeqs==0 is encoded using 2 bytes The sequence section starts with a number, which tells how sequences are present in the section. If this number if 0, the section automatically ends. The number 0 can be represented using the 1 byte or the 2 bytes formats. That's because the 2-bytes formats fully overlaps the 1 byte format. However, when 0 is represented using the 2-bytes format, the decoder was expecting the sequence section to continue, and was looking for FSE tables, which is incorrect. Fixed this behavior, in both the reference decoder and the educational behavior. In practice, this behavior never happens, because the encoder will always select the 1-byte format to represent 0, since this is more efficient. Completed the fix with a new golden sample for tests, a clarification of the specification, and a decoder errata paragraph. --- doc/decompressor_errata.md | 23 ++++++++++++++++++++++ doc/educational_decoder/zstd_decompress.c | 13 ++++++------ doc/zstd_compression_format.md | 12 ++++++----- lib/common/zstd_internal.h | 4 ++-- lib/decompress/zstd_decompress_block.c | 10 +++++----- tests/golden-decompression/zeroSeq_2B.zst | Bin 0 -> 25 bytes 6 files changed, 44 insertions(+), 18 deletions(-) create mode 100644 tests/golden-decompression/zeroSeq_2B.zst diff --git a/doc/decompressor_errata.md b/doc/decompressor_errata.md index e170a62c155..83d4071cb4d 100644 --- a/doc/decompressor_errata.md +++ b/doc/decompressor_errata.md @@ -12,6 +12,26 @@ Each entry will contain: The document is in reverse chronological order, with the bugs that affect the most recent zstd decompressor versions listed first. +No sequence using the 2-bytes format +------------------------------------------------ + +**Last affected version**: v1.5.5 + +**Affected decompressor component(s)**: Library & CLI + +**Produced by the reference compressor**: No + +**Example Frame**: see zstd/tests/golden-decompression/zeroSeq_2B.zst + +The zstd decoder incorrectly expects FSE tables when there are 0 sequences present in the block +if the value 0 is encoded using the 2-bytes format. +Instead, it should immediately end the sequence section, and move on to next block. + +This situation was never generated by the reference compressor, +because representing 0 sequences with the 2-bytes format is inefficient +(the 1-byte format is always used in this case). + + Compressed block with a size of exactly 128 KB ------------------------------------------------ @@ -32,6 +52,7 @@ These blocks used to be disallowed by the spec up until spec version 0.3.2 when > A Compressed_Block has the extra restriction that Block_Size is always strictly less than the decompressed size. If this condition cannot be respected, the block must be sent uncompressed instead (Raw_Block). + Compressed block with 0 literals and 0 sequences ------------------------------------------------ @@ -51,6 +72,7 @@ Additionally, these blocks were disallowed by the spec up until spec version 0.3 > A Compressed_Block has the extra restriction that Block_Size is always strictly less than the decompressed size. If this condition cannot be respected, the block must be sent uncompressed instead (Raw_Block). + First block is RLE block ------------------------ @@ -72,6 +94,7 @@ block. https://github.com/facebook/zstd/blob/8814aa5bfa74f05a86e55e9d508da177a893ceeb/lib/compress/zstd_compress.c#L3527-L3535 + Tiny FSE Table & Block ---------------------- diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c index 1da7c528d23..921c8f54cd3 100644 --- a/doc/educational_decoder/zstd_decompress.c +++ b/doc/educational_decoder/zstd_decompress.c @@ -1018,12 +1018,7 @@ static size_t decode_sequences(frame_context_t *const ctx, istream_t *in, // This is a variable size field using between 1 and 3 bytes. Let's call its // first byte byte0." u8 header = IO_read_bits(in, 8); - if (header == 0) { - // "There are no sequences. The sequence section stops there. - // Regenerated content is defined entirely by literals section." - *sequences = NULL; - return 0; - } else if (header < 128) { + if (header < 128) { // "Number_of_Sequences = byte0 . Uses 1 byte." num_sequences = header; } else if (header < 255) { @@ -1034,6 +1029,12 @@ static size_t decode_sequences(frame_context_t *const ctx, istream_t *in, num_sequences = IO_read_bits(in, 16) + 0x7F00; } + if (num_sequences == 0) { + // "There are no sequences. The sequence section stops there." + *sequences = NULL; + return 0; + } + *sequences = malloc(num_sequences * sizeof(sequence_command_t)); if (!*sequences) { BAD_ALLOC(); diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md index 2a69c4c30ae..cd7308de196 100644 --- a/doc/zstd_compression_format.md +++ b/doc/zstd_compression_format.md @@ -16,7 +16,7 @@ Distribution of this document is unlimited. ### Version -0.3.9 (2023-03-08) +0.4.0 (2023-06-05) Introduction @@ -650,15 +650,16 @@ __`Number_of_Sequences`__ This is a variable size field using between 1 and 3 bytes. Let's call its first byte `byte0`. -- `if (byte0 == 0)` : there are no sequences. - The sequence section stops there. - Decompressed content is defined entirely as Literals Section content. - The FSE tables used in `Repeat_Mode` aren't updated. - `if (byte0 < 128)` : `Number_of_Sequences = byte0` . Uses 1 byte. - `if (byte0 < 255)` : `Number_of_Sequences = ((byte0 - 0x80) << 8) + byte1`. Uses 2 bytes. Note that the 2 bytes format fully overlaps the 1 byte format. - `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00`. Uses 3 bytes. +`if (Number_of_Sequences == 0)` : there are no sequences. + The sequence section stops immediately, + FSE tables used in `Repeat_Mode` aren't updated. + Block's decompressed content is defined solely by the Literals Section content. + __Symbol compression modes__ This is a single byte, defining the compression mode of each symbol type. @@ -1698,6 +1699,7 @@ or at least provide a meaningful error code explaining for which reason it canno Version changes --------------- +- 0.4.0 : fixed imprecise behavior for nbSeq==0, detected by Igor Pavlov - 0.3.9 : clarifications for Huffman-compressed literal sizes. - 0.3.8 : clarifications for Huffman Blocks and Huffman Tree descriptions. - 0.3.7 : clarifications for Repeat_Offsets, matching RFC8878 diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 1f942f27bf0..f7c57a028bf 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -366,13 +366,13 @@ typedef struct { /*! ZSTD_getcBlockSize() : * Provides the size of compressed block from block header `src` */ -/* Used by: decompress, fullbench (does not get its definition from here) */ +/* Used by: decompress, fullbench */ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr); /*! ZSTD_decodeSeqHeaders() : * decode sequence header from src */ -/* Used by: decompress, fullbench (does not get its definition from here) */ +/* Used by: zstd_decompress_block, fullbench */ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, const void* src, size_t srcSize); diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 5028a52f103..c63d06d6a9e 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -706,11 +706,6 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, /* SeqHead */ nbSeq = *ip++; - if (!nbSeq) { - *nbSeqPtr=0; - RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); - return 1; - } if (nbSeq > 0x7F) { if (nbSeq == 0xFF) { RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); @@ -723,6 +718,11 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, } *nbSeqPtr = nbSeq; + if (nbSeq == 0) { + /* No sequence : section ends immediately */ + return (size_t)(ip - istart); + } + /* FSE table descriptors */ RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); diff --git a/tests/golden-decompression/zeroSeq_2B.zst b/tests/golden-decompression/zeroSeq_2B.zst new file mode 100644 index 0000000000000000000000000000000000000000..f9f3520a6eb823709594cbe57df3c1b497984f48 GIT binary patch literal 25 gcmdPcs{faPp_PFl!y`2(Cto2vzbGd~k*k3L0BK Date: Mon, 12 Jun 2023 06:00:14 +0000 Subject: [PATCH 046/283] Bump actions/checkout from 3.5.2 to 3.5.3 Bumps [actions/checkout](https://github.com/actions/checkout) from 3.5.2 to 3.5.3. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/8e5e7e5ab8b370d6c329ec480221332ada57f0ab...c85c95e3d7251135ab7dc9ce3241c5835cc595a9) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/dev-long-tests.yml | 50 ++++++------- .github/workflows/dev-short-tests.yml | 74 +++++++++---------- .../workflows/publish-release-artifacts.yml | 2 +- .github/workflows/scorecards.yml | 2 +- .github/workflows/windows-artifacts.yml | 2 +- 5 files changed, 65 insertions(+), 65 deletions(-) diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml index deef9f78066..6a0e338f0dd 100644 --- a/.github/workflows/dev-long-tests.yml +++ b/.github/workflows/dev-long-tests.yml @@ -15,7 +15,7 @@ jobs: make-all: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 - name: make all run: make all @@ -26,7 +26,7 @@ jobs: DEVNULLRIGHTS: 1 READFROMBLOCKDEVICE: 1 steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 - name: make test run: make test @@ -34,7 +34,7 @@ jobs: make-test-osx: runs-on: macos-latest steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 - name: OS-X test run: make test # make -c lib all doesn't work because of the fact that it's not a tty @@ -45,7 +45,7 @@ jobs: DEVNULLRIGHTS: 1 READFROMBLOCKDEVICE: 1 steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 - name: make test run: | sudo apt-get -qqq update @@ -55,21 +55,21 @@ jobs: no-intrinsics-fuzztest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 - name: no intrinsics fuzztest run: MOREFLAGS="-DZSTD_NO_INTRINSICS" make -C tests fuzztest tsan-zstreamtest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 - name: thread sanitizer zstreamtest run: CC=clang ZSTREAM_TESTTIME=-T3mn make tsan-test-zstream ubsan-zstreamtest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 - name: undefined behavior sanitizer zstreamtest run: CC=clang make uasan-test-zstream @@ -77,7 +77,7 @@ jobs: tsan-fuzztest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 - name: thread sanitizer fuzztest run: CC=clang make tsan-fuzztest @@ -85,7 +85,7 @@ jobs: big-tests-zstreamtest32: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 - name: zstream tests in 32bit mode, with big tests run: | sudo apt-get -qqq update @@ -96,7 +96,7 @@ jobs: gcc-8-asan-ubsan-testzstd: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 - name: gcc-8 + ASan + UBSan + Test Zstd # See https://askubuntu.com/a/1428822 run: | @@ -108,14 +108,14 @@ jobs: clang-asan-ubsan-testzstd: runs-on: ubuntu-latest steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 - name: clang + ASan + UBSan + Test Zstd run: CC=clang make -j uasan-test-zstd Date: Tue, 13 Jun 2023 11:43:45 -0700 Subject: [PATCH 047/283] detect extraneous bytes in the Sequences section when nbSeq == 0. Reported by @ip7z --- lib/decompress/zstd_decompress_block.c | 2 ++ tests/cli-tests/decompression/detectErrors.sh | 11 +++++++++++ tests/playTests.sh | 5 +++++ 3 files changed, 18 insertions(+) create mode 100755 tests/cli-tests/decompression/detectErrors.sh diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index c63d06d6a9e..93947ba584f 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -720,6 +720,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, if (nbSeq == 0) { /* No sequence : section ends immediately */ + RETURN_ERROR_IF(ip != iend, corruption_detected, + "extraneous data present in the Sequences section"); return (size_t)(ip - istart); } diff --git a/tests/cli-tests/decompression/detectErrors.sh b/tests/cli-tests/decompression/detectErrors.sh new file mode 100755 index 00000000000..300cde36438 --- /dev/null +++ b/tests/cli-tests/decompression/detectErrors.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +set -e + +GOLDEN_DIR="$ZSTD_REPO_DIR/tests/golden-decompression-errors/" + +for file in "$GOLDEN_DIR"/*; do + zstd -t $file && die "should have detected an error" +done +exit 0 + diff --git a/tests/playTests.sh b/tests/playTests.sh index f51776170bd..05dc0042999 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -444,6 +444,11 @@ $DIFF -s tmp1 tmp touch tmp_empty zstd -d -o tmp2 "$TESTDIR/golden-decompression/empty-block.zst" $DIFF -s tmp2 tmp_empty + +zstd -t "$TESTDIR/golden-decompression/zeroSeq_2B.zst" + +zstd -t "$TESTDIR/golden-decompression-errors/zeroSeq_extraneous.zst" && die "invalid Sequences section should have been detected" + rm -f tmp* println "\n===> compress multiple files" From ba508070299b4ab7ae1e22b659557489122cdcd7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 14 Jun 2023 15:42:37 -0700 Subject: [PATCH 048/283] make the bitstream generate only 0-value bits after an overflow --- lib/common/bitstream.h | 67 ++++++++++++++++++++++++------------------ lib/common/compiler.h | 19 ++++++++++++ lib/common/mem.h | 9 ------ lib/legacy/zstd_v06.c | 9 ------ lib/legacy/zstd_v07.c | 9 ------ 5 files changed, 57 insertions(+), 56 deletions(-) diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 72b0b3df227..374f2b68902 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -90,19 +90,20 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); /*-******************************************** * bitStream decoding API (read backward) **********************************************/ +typedef size_t BitContainerType; typedef struct { - size_t bitContainer; + BitContainerType bitContainer; unsigned bitsConsumed; const char* ptr; const char* start; const char* limitPtr; } BIT_DStream_t; -typedef enum { BIT_DStream_unfinished = 0, - BIT_DStream_endOfBuffer = 1, - BIT_DStream_completed = 2, - BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ - /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ +typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */ + BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */ + BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */ + BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */ + } BIT_DStream_status; /* result of BIT_reloadDStream() */ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); @@ -112,7 +113,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); /* Start by invoking BIT_initDStream(). * A chunk of the bitStream is then stored into a local register. -* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType). * You can then retrieve bitFields stored into the local register, **in reverse order**. * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. @@ -162,7 +163,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, return 0; } -MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) { #if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS) return _bzhi_u64(bitContainer, nbBits); @@ -267,22 +268,22 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si bitD->bitContainer = *(const BYTE*)(bitD->start); switch(srcSize) { - case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); + case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); ZSTD_FALLTHROUGH; - case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); + case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); ZSTD_FALLTHROUGH; - case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); + case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); ZSTD_FALLTHROUGH; - case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; + case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24; ZSTD_FALLTHROUGH; - case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; + case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16; ZSTD_FALLTHROUGH; - case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; + case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8; ZSTD_FALLTHROUGH; default: break; @@ -297,12 +298,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si return srcSize; } -MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start) { return bitContainer >> start; } -MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) +FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits) { U32 const regMask = sizeof(bitContainer)*8 - 1; /* if start > regMask, bitstream is corrupted, and result is undefined */ @@ -325,7 +326,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c * On 32-bits, maxNbBits==24. * On 64-bits, maxNbBits==56. * @return : value extracted */ -MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { /* arbitrate between double-shift and shift+mask */ #if 1 @@ -348,7 +349,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); } -MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) { bitD->bitsConsumed += nbBits; } @@ -357,7 +358,7 @@ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) * Read (consume) next n bits from local register and update. * Pay attention to not read more than nbBits contained into local register. * @return : extracted value. */ -MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) +FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) { size_t const value = BIT_lookBits(bitD, nbBits); BIT_skipBits(bitD, nbBits); @@ -375,16 +376,16 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) } /*! BIT_reloadDStreamFast() : - * Similar to BIT_reloadDStream(), but with two differences: - * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! - * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this - * point you must use BIT_reloadDStream() to reload. + * Simple variant of BIT_reloadDStream(), with two conditions: + * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 + * 2. bitD->ptr >= bitD->limitPtr + * These conditions guarantee that bitstream is in a valid state, + * and shifting the position of the look window is safe. */ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) { - if (UNLIKELY(bitD->ptr < bitD->limitPtr)) - return BIT_DStream_overflow; assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); + assert(bitD->ptr >= bitD->limitPtr); bitD->ptr -= bitD->bitsConsumed >> 3; bitD->bitsConsumed &= 7; bitD->bitContainer = MEM_readLEST(bitD->ptr); @@ -393,22 +394,30 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) /*! BIT_reloadDStream() : * Refill `bitD` from buffer previously set in BIT_initDStream() . - * This function is safe, it guarantees it will not read beyond src buffer. + * This function is safe, it guarantees it will not never beyond src buffer. * @return : status of `BIT_DStream_t` internal register. * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ -MEM_STATIC FORCE_INLINE_ATTR BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) { - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ + /* note : once in overflow mode, a bitstream remains in this mode until it's reset */ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) { + static const BitContainerType zeroFilled = 0; + bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */ + /* overflow detected, erroneous scenario or end of stream: no update */ return BIT_DStream_overflow; + } + + assert(bitD->ptr >= bitD->start); if (bitD->ptr >= bitD->limitPtr) { return BIT_reloadDStreamFast(bitD); } if (bitD->ptr == bitD->start) { + /* reached end of bitStream => no update */ if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; return BIT_DStream_completed; } - /* start < ptr < limitPtr */ + /* start < ptr < limitPtr => cautious update */ { U32 nbBytes = bitD->bitsConsumed >> 3; BIT_DStream_status result = BIT_DStream_unfinished; if (bitD->ptr - nbBytes < bitD->start) { diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 79e773c0fbe..1cde912f911 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -81,6 +81,25 @@ # define UNUSED_ATTR #endif +/* "soft" inline : + * The compiler is free to select if it's a good idea to inline or not. + * The main objective is to silence compiler warnings + * when a defined function in included but not used. + * + * Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit. + * Updating the prefix is probably preferable, but requires a fairly large codemod, + * since this name is used everywhere. + */ +#if defined(__GNUC__) +# define MEM_STATIC static __inline UNUSED_ATTR +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + /* force no inlining */ #ifdef _MSC_VER # define FORCE_NOINLINE static __declspec(noinline) diff --git a/lib/common/mem.h b/lib/common/mem.h index 98dd47a0476..096f4be519d 100644 --- a/lib/common/mem.h +++ b/lib/common/mem.h @@ -31,15 +31,6 @@ extern "C" { # include /* _byteswap_ulong */ # include /* _byteswap_* */ #endif -#if defined(__GNUC__) -# define MEM_STATIC static __inline __attribute__((unused)) -#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define MEM_STATIC static inline -#elif defined(_MSC_VER) -# define MEM_STATIC static __inline -#else -# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif /*-************************************************************** * Basic Types diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c index 175f7cc4224..ac9ae987c47 100644 --- a/lib/legacy/zstd_v06.c +++ b/lib/legacy/zstd_v06.c @@ -67,15 +67,6 @@ extern "C" { # include /* _byteswap_ulong */ # include /* _byteswap_* */ #endif -#if defined(__GNUC__) -# define MEM_STATIC static __attribute__((unused)) -#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define MEM_STATIC static inline -#elif defined(_MSC_VER) -# define MEM_STATIC static __inline -#else -# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif /*-************************************************************** diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c index 15dc3ef7994..b214ec08bdf 100644 --- a/lib/legacy/zstd_v07.c +++ b/lib/legacy/zstd_v07.c @@ -227,15 +227,6 @@ extern "C" { # include /* _byteswap_ulong */ # include /* _byteswap_* */ #endif -#if defined(__GNUC__) -# define MEM_STATIC static __attribute__((unused)) -#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define MEM_STATIC static inline -#elif defined(_MSC_VER) -# define MEM_STATIC static __inline -#else -# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif /*-************************************************************** From 74c901bbedd4584190f0cd93d573cf7e014b76d1 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 14 Jun 2023 16:32:51 -0700 Subject: [PATCH 049/283] fix : unused attribute for FORCE_INLINE functions fix2 : reloadDStreamFast is used by decompress4x2, modified the entry point, so that it works fine in this case too. --- lib/common/bitstream.h | 27 +++++++++++++++++++-------- lib/common/compiler.h | 18 +++++++++--------- lib/legacy/zstd_v04.c | 9 --------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 374f2b68902..a737f011cb5 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -375,23 +375,34 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) return value; } -/*! BIT_reloadDStreamFast() : +/*! BIT_reloadDStream_internal() : * Simple variant of BIT_reloadDStream(), with two conditions: - * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 - * 2. bitD->ptr >= bitD->limitPtr - * These conditions guarantee that bitstream is in a valid state, - * and shifting the position of the look window is safe. + * 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8 + * 2. look window is valid after shifted down : bitD->ptr >= bitD->start */ -MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) +MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD) { assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); - assert(bitD->ptr >= bitD->limitPtr); bitD->ptr -= bitD->bitsConsumed >> 3; + assert(bitD->ptr >= bitD->start); bitD->bitsConsumed &= 7; bitD->bitContainer = MEM_readLEST(bitD->ptr); return BIT_DStream_unfinished; } +/*! BIT_reloadDStreamFast() : + * Similar to BIT_reloadDStream(), but with two differences: + * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! + * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this + * point you must use BIT_reloadDStream() to reload. + */ +MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) +{ + if (UNLIKELY(bitD->ptr < bitD->limitPtr)) + return BIT_DStream_overflow; + return BIT_reloadDStream_internal(bitD); +} + /*! BIT_reloadDStream() : * Refill `bitD` from buffer previously set in BIT_initDStream() . * This function is safe, it guarantees it will not never beyond src buffer. @@ -410,7 +421,7 @@ FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) assert(bitD->ptr >= bitD->start); if (bitD->ptr >= bitD->limitPtr) { - return BIT_reloadDStreamFast(bitD); + return BIT_reloadDStream_internal(bitD); } if (bitD->ptr == bitD->start) { /* reached end of bitStream => no update */ diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 1cde912f911..35b9c138793 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -51,12 +51,19 @@ # define WIN_CDECL #endif +/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ +#if defined(__GNUC__) +# define UNUSED_ATTR __attribute__((unused)) +#else +# define UNUSED_ATTR +#endif + /** * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant * parameters. They must be inlined for the compiler to eliminate the constant * branches. */ -#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR /** * HINT_INLINE is used to help the compiler generate better code. It is *not* * used for "templates", so it can be tweaked based on the compilers @@ -71,14 +78,7 @@ #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 # define HINT_INLINE static INLINE_KEYWORD #else -# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR -#endif - -/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ -#if defined(__GNUC__) -# define UNUSED_ATTR __attribute__((unused)) -#else -# define UNUSED_ATTR +# define HINT_INLINE FORCE_INLINE_TEMPLATE #endif /* "soft" inline : diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c index 57be832bd32..fa65160bcc8 100644 --- a/lib/legacy/zstd_v04.c +++ b/lib/legacy/zstd_v04.c @@ -37,15 +37,6 @@ extern "C" { # include /* _byteswap_ulong */ # include /* _byteswap_* */ #endif -#if defined(__GNUC__) -# define MEM_STATIC static __attribute__((unused)) -#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define MEM_STATIC static inline -#elif defined(_MSC_VER) -# define MEM_STATIC static __inline -#else -# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif /**************************************************************** From d9645327b3b6d18b04ac1dd0bc4346a2af87bb9b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 14 Jun 2023 20:03:39 -0700 Subject: [PATCH 050/283] fixed MEM_STATIC already defined in Linux Kernel mode --- contrib/linux-kernel/mem.h | 1 + lib/common/allocations.h | 2 +- lib/common/compiler.h | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/contrib/linux-kernel/mem.h b/contrib/linux-kernel/mem.h index a7231822b6e..2e91e7780c1 100644 --- a/contrib/linux-kernel/mem.h +++ b/contrib/linux-kernel/mem.h @@ -24,6 +24,7 @@ /*-**************************************** * Compiler specifics ******************************************/ +#undef MEM_STATIC /* may be already defined from common/compiler.h */ #define MEM_STATIC static inline /*-************************************************************** diff --git a/lib/common/allocations.h b/lib/common/allocations.h index a3153c4bac2..5e899550109 100644 --- a/lib/common/allocations.h +++ b/lib/common/allocations.h @@ -14,7 +14,7 @@ #define ZSTD_DEPS_NEED_MALLOC #include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ -#include "mem.h" /* MEM_STATIC */ +#include "compiler.h" /* MEM_STATIC */ #define ZSTD_STATIC_LINKING_ONLY #include "../zstd.h" /* ZSTD_customMem */ diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 35b9c138793..bcaa575dda6 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -90,6 +90,7 @@ * Updating the prefix is probably preferable, but requires a fairly large codemod, * since this name is used everywhere. */ +#ifndef MEM_STATIC /* already defined in Linux Kernel mem.h */ #if defined(__GNUC__) # define MEM_STATIC static __inline UNUSED_ATTR #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) @@ -99,6 +100,7 @@ #else # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ #endif +#endif /* force no inlining */ #ifdef _MSC_VER From 02134fad123a26e17bcb48edc2868b5968ed76d5 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 16 Jun 2023 11:56:22 -0700 Subject: [PATCH 051/283] changed (partially) the decodeSequences flow logic this allows detecting overflow events without a checksum. --- lib/decompress/zstd_decompress_block.c | 160 ++++++++++++++++++++++--- 1 file changed, 145 insertions(+), 15 deletions(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 93947ba584f..2d022004343 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -1214,14 +1214,20 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +/** + * ZSTD_decodeSequence_old(): + * @p longOffsets : tells the decoder to reload more bit while decoding large offsets + * only used in 32-bit mode + * @return : Sequence (litL + matchL + offset) + */ FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) +ZSTD_decodeSequence_old(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) { seq_t seq; /* - * ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be - * loaded in one operation and extracted its fields by simply shifting or - * bit-extracting on aarch64. + * ZSTD_seqSymbol is a 64 bits wide structure. + * It can be loaded in one operation + * and its fields extracted by simply shifting or bit-extracting on aarch64. * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh * operations that cause performance drop. This can be avoided by using this * ZSTD_memcpy hack. @@ -1330,6 +1336,132 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) return seq; } +/** + * ZSTD_decodeSequence(): + * @p longOffsets : tells the decoder to reload more bit while decoding large offsets + * only used in 32-bit mode + * @return : Sequence (litL + matchL + offset) + */ +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq) +{ + seq_t seq; + /* + * ZSTD_seqSymbol is a 64 bits wide structure. + * It can be loaded in one operation + * and its fields extracted by simply shifting or bit-extracting on aarch64. + * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh + * operations that cause performance drop. This can be avoided by using this + * ZSTD_memcpy hack. + */ +#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__)) + ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS; + ZSTD_seqSymbol* const llDInfo = &llDInfoS; + ZSTD_seqSymbol* const mlDInfo = &mlDInfoS; + ZSTD_seqSymbol* const ofDInfo = &ofDInfoS; + ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol)); + ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol)); + ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol)); +#else + const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state; + const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state; + const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state; +#endif + seq.matchLength = mlDInfo->baseValue; + seq.litLength = llDInfo->baseValue; + { U32 const ofBase = ofDInfo->baseValue; + BYTE const llBits = llDInfo->nbAdditionalBits; + BYTE const mlBits = mlDInfo->nbAdditionalBits; + BYTE const ofBits = ofDInfo->nbAdditionalBits; + BYTE const totalBits = llBits+mlBits+ofBits; + + U16 const llNext = llDInfo->nextState; + U16 const mlNext = mlDInfo->nextState; + U16 const ofNext = ofDInfo->nextState; + U32 const llnbBits = llDInfo->nbBits; + U32 const mlnbBits = mlDInfo->nbBits; + U32 const ofnbBits = ofDInfo->nbBits; + + assert(llBits <= MaxLLBits); + assert(mlBits <= MaxMLBits); + assert(ofBits <= MaxOff); + /* + * As gcc has better branch and block analyzers, sometimes it is only + * valuable to mark likeliness for clang, it gives around 3-4% of + * performance. + */ + + /* sequence */ + { size_t offset; + if (ofBits > 1) { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32); + ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + /* Always read extra bits, this keeps the logic simple, + * avoids branches, and avoids accidentally reading 0 bits. + */ + U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32; + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + offset += BIT_readBitsFast(&seqState->DStream, extraBits); + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } else { + U32 const ll0 = (llDInfo->baseValue == 0); + if (LIKELY((ofBits == 0))) { + offset = seqState->prevOffset[ll0]; + seqState->prevOffset[1] = seqState->prevOffset[!ll0]; + seqState->prevOffset[0] = offset; + } else { + offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); + { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } } } + seq.offset = offset; + } + + if (mlBits > 0) + seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); + + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + if (llBits > 0) + seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); + + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + if (!isLastSeq) { + /* don't update FSE state for last Sequence */ + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */ + BIT_reloadDStream(&seqState->DStream); + } + } + + return seq; +} + #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) { @@ -1420,7 +1552,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, /* decompress without overrunning litPtr begins */ { - seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + seq_t sequence = ZSTD_decodeSequence_old(&seqState, isLongOffset); /* Align the decompression loop to 32 + 16 bytes. * * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression @@ -1495,7 +1627,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, if (UNLIKELY(!--nbSeq)) break; BIT_reloadDStream(&(seqState.DStream)); - sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + sequence = ZSTD_decodeSequence_old(&seqState, isLongOffset); } /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */ @@ -1548,7 +1680,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, #endif for (; ; ) { - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + seq_t const sequence = ZSTD_decodeSequence_old(&seqState, isLongOffset); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); @@ -1647,8 +1779,8 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, # endif #endif - for ( ; ; ) { - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + for ( ; nbSeq ; nbSeq--) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); @@ -1658,15 +1790,13 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, return oneSeqSize; DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); op += oneSeqSize; - if (UNLIKELY(!--nbSeq)) - break; - BIT_reloadDStream(&(seqState.DStream)); } /* check if reached exact end */ DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); RETURN_ERROR_IF(nbSeq, corruption_detected, ""); - RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); + DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed); + RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); /* save reps for next block */ { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } } @@ -1763,7 +1893,7 @@ ZSTD_decompressSequencesLong_body( /* prepare in advance */ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNblitBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) From 84e898a76c50aa31bd05b37a370c674250706254 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 16 Jun 2023 12:31:23 -0700 Subject: [PATCH 052/283] removed _old variant from splitLit --- lib/common/bitstream.h | 2 +- lib/decompress/zstd_decompress_block.c | 30 +++++++++++--------------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index a737f011cb5..676044989c9 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -411,7 +411,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) { /* note : once in overflow mode, a bitstream remains in this mode until it's reset */ - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) { + if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) { static const BitContainerType zeroFilled = 0; bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */ /* overflow detected, erroneous scenario or end of stream: no update */ diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 2d022004343..24fc490b216 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -873,7 +873,7 @@ static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, pt /* ZSTD_safecopyDstBeforeSrc(): * This version allows overlap with dst before src, or handles the non-overlap case with dst after src * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */ -static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) { +static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) { ptrdiff_t const diff = op - ip; BYTE* const oend = op + length; @@ -1530,7 +1530,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer"); + DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq); /* Regen sequences */ if (nbSeq) { @@ -1552,7 +1552,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, /* decompress without overrunning litPtr begins */ { - seq_t sequence = ZSTD_decodeSequence_old(&seqState, isLongOffset); + seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); /* Align the decompression loop to 32 + 16 bytes. * * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression @@ -1626,15 +1626,15 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, op += oneSeqSize; if (UNLIKELY(!--nbSeq)) break; - BIT_reloadDStream(&(seqState.DStream)); - sequence = ZSTD_decodeSequence_old(&seqState, isLongOffset); + sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); } + DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)"); /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */ if (nbSeq > 0) { const size_t leftoverLit = dctx->litBufferEnd - litPtr; - if (leftoverLit) - { + DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength); + if (leftoverLit) { RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); sequence.litLength -= leftoverLit; @@ -1643,8 +1643,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, litPtr = dctx->litExtraBuffer; litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; dctx->litBufferLocation = ZSTD_not_in_dst; - { - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); + { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); @@ -1653,9 +1652,8 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, return oneSeqSize; DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); op += oneSeqSize; - if (--nbSeq) - BIT_reloadDStream(&(seqState.DStream)); } + nbSeq--; } } @@ -1679,8 +1677,8 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, # endif #endif - for (; ; ) { - seq_t const sequence = ZSTD_decodeSequence_old(&seqState, isLongOffset); + for ( ; nbSeq ; nbSeq--) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); @@ -1690,16 +1688,14 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, return oneSeqSize; DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); op += oneSeqSize; - if (UNLIKELY(!--nbSeq)) - break; - BIT_reloadDStream(&(seqState.DStream)); } } /* check if reached exact end */ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq); RETURN_ERROR_IF(nbSeq, corruption_detected, ""); - RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); + DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed); + RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); /* save reps for next block */ { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } } From 33fca19dd4b8cc9d68feb3daa129297b31680e47 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 16 Jun 2023 15:32:07 -0700 Subject: [PATCH 053/283] changed ZSTD_decompressSequences_bodySplitLitBuffer() decoding loop to behave more like the regular decoding loop. --- lib/decompress/zstd_decompress_block.c | 40 ++++++++++++-------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 24fc490b216..4a7d8ec407c 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -1532,7 +1532,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq); - /* Regen sequences */ + /* Literals are split between internal buffer & output buffer */ if (nbSeq) { seqState_t seqState; dctx->fseEntropy = 1; @@ -1551,8 +1551,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, BIT_DStream_completed < BIT_DStream_overflow); /* decompress without overrunning litPtr begins */ - { - seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); + { seq_t sequence; /* Align the decompression loop to 32 + 16 bytes. * * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression @@ -1614,20 +1613,19 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, #endif /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */ - for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) { - size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); + for ( ; nbSeq; nbSeq--) { + sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); + if (litPtr + sequence.litLength > dctx->litBufferEnd) break; + { size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); - ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); + assert(!ZSTD_isError(oneSeqSize)); + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); #endif - if (UNLIKELY(ZSTD_isError(oneSeqSize))) - return oneSeqSize; - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - op += oneSeqSize; - if (UNLIKELY(!--nbSeq)) - break; - sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); - } + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + op += oneSeqSize; + } } DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)"); /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */ @@ -1657,8 +1655,8 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, } } - if (nbSeq > 0) /* there is remaining lit from extra buffer */ - { + if (nbSeq > 0) { + /* there is remaining lit from extra buffer */ #if defined(__GNUC__) && defined(__x86_64__) __asm__(".p2align 6"); @@ -1701,8 +1699,8 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, } /* last literal segment */ - if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ - { + if (dctx->litBufferLocation == ZSTD_split) { + /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ size_t const lastLLSize = litBufferEnd - litPtr; RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); if (op != NULL) { @@ -1713,13 +1711,13 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; dctx->litBufferLocation = ZSTD_not_in_dst; } + /* copy last literals from interal buffer */ { size_t const lastLLSize = litBufferEnd - litPtr; RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); if (op != NULL) { ZSTD_memcpy(op, litPtr, lastLLSize); op += lastLLSize; - } - } + } } return op-ostart; } From c60dcedcc91da9bb7550f237f79ee001ca6d1a75 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 16 Jun 2023 15:52:00 -0700 Subject: [PATCH 054/283] adapted long decoder to new decodeSequences removed older decodeSequences --- lib/decompress/zstd_decompress_block.c | 179 +++---------------------- 1 file changed, 21 insertions(+), 158 deletions(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 4a7d8ec407c..0d6c2e01103 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -1214,128 +1214,6 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; -/** - * ZSTD_decodeSequence_old(): - * @p longOffsets : tells the decoder to reload more bit while decoding large offsets - * only used in 32-bit mode - * @return : Sequence (litL + matchL + offset) - */ -FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequence_old(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) -{ - seq_t seq; - /* - * ZSTD_seqSymbol is a 64 bits wide structure. - * It can be loaded in one operation - * and its fields extracted by simply shifting or bit-extracting on aarch64. - * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh - * operations that cause performance drop. This can be avoided by using this - * ZSTD_memcpy hack. - */ -#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__)) - ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS; - ZSTD_seqSymbol* const llDInfo = &llDInfoS; - ZSTD_seqSymbol* const mlDInfo = &mlDInfoS; - ZSTD_seqSymbol* const ofDInfo = &ofDInfoS; - ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol)); - ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol)); - ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol)); -#else - const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state; - const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state; - const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state; -#endif - seq.matchLength = mlDInfo->baseValue; - seq.litLength = llDInfo->baseValue; - { U32 const ofBase = ofDInfo->baseValue; - BYTE const llBits = llDInfo->nbAdditionalBits; - BYTE const mlBits = mlDInfo->nbAdditionalBits; - BYTE const ofBits = ofDInfo->nbAdditionalBits; - BYTE const totalBits = llBits+mlBits+ofBits; - - U16 const llNext = llDInfo->nextState; - U16 const mlNext = mlDInfo->nextState; - U16 const ofNext = ofDInfo->nextState; - U32 const llnbBits = llDInfo->nbBits; - U32 const mlnbBits = mlDInfo->nbBits; - U32 const ofnbBits = ofDInfo->nbBits; - - assert(llBits <= MaxLLBits); - assert(mlBits <= MaxMLBits); - assert(ofBits <= MaxOff); - /* - * As gcc has better branch and block analyzers, sometimes it is only - * valuable to mark likeliness for clang, it gives around 3-4% of - * performance. - */ - - /* sequence */ - { size_t offset; - if (ofBits > 1) { - ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); - ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32); - ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits); - if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { - /* Always read extra bits, this keeps the logic simple, - * avoids branches, and avoids accidentally reading 0 bits. - */ - U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32; - offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - BIT_reloadDStream(&seqState->DStream); - offset += BIT_readBitsFast(&seqState->DStream, extraBits); - } else { - offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); - } - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } else { - U32 const ll0 = (llDInfo->baseValue == 0); - if (LIKELY((ofBits == 0))) { - offset = seqState->prevOffset[ll0]; - seqState->prevOffset[1] = seqState->prevOffset[!ll0]; - seqState->prevOffset[0] = offset; - } else { - offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); - { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } } } - seq.offset = offset; - } - - if (mlBits > 0) - seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); - - if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) - BIT_reloadDStream(&seqState->DStream); - if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ - ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - - if (llBits > 0) - seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); - - if (MEM_32bits()) - BIT_reloadDStream(&seqState->DStream); - - DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - - ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */ - ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */ - } - - return seq; -} - /** * ZSTD_decodeSequence(): * @p longOffsets : tells the decoder to reload more bit while decoding large offsets @@ -1754,11 +1632,6 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); assert(dst != NULL); - ZSTD_STATIC_ASSERT( - BIT_DStream_unfinished < BIT_DStream_completed && - BIT_DStream_endOfBuffer < BIT_DStream_completed && - BIT_DStream_completed < BIT_DStream_overflow); - #if defined(__GNUC__) && defined(__x86_64__) __asm__(".p2align 6"); __asm__("nop"); @@ -1787,9 +1660,7 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, } /* check if reached exact end */ - DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); - RETURN_ERROR_IF(nbSeq, corruption_detected, ""); - DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed); + assert(nbSeq == 0); RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); /* save reps for next block */ { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } @@ -1801,8 +1672,7 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, if (op != NULL) { ZSTD_memcpy(op, litPtr, lastLLSize); op += lastLLSize; - } - } + } } return op-ostart; } @@ -1886,20 +1756,17 @@ ZSTD_decompressSequencesLong_body( ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); /* prepare in advance */ - for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNblitBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) - { + if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) { /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */ const size_t leftoverLit = dctx->litBufferEnd - litPtr; if (leftoverLit) @@ -1912,21 +1779,21 @@ ZSTD_decompressSequencesLong_body( litPtr = dctx->litExtraBuffer; litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; dctx->litBufferLocation = ZSTD_not_in_dst; - oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); + { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); - ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); + assert(!ZSTD_isError(oneSeqSize)); + ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); #endif - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); - sequences[seqNb & STORED_SEQS_MASK] = sequence; - op += oneSeqSize; - } + prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); + sequences[seqNb & STORED_SEQS_MASK] = sequence; + op += oneSeqSize; + } } else { /* lit buffer is either wholly contained in first or second split, or not split at all*/ - oneSeqSize = dctx->litBufferLocation == ZSTD_split ? + size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ? ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) : ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) @@ -1940,17 +1807,15 @@ ZSTD_decompressSequencesLong_body( op += oneSeqSize; } } - RETURN_ERROR_IF(seqNblitBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) - { + if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) { const size_t leftoverLit = dctx->litBufferEnd - litPtr; - if (leftoverLit) - { + if (leftoverLit) { RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); sequence->litLength -= leftoverLit; @@ -1959,8 +1824,7 @@ ZSTD_decompressSequencesLong_body( litPtr = dctx->litExtraBuffer; litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; dctx->litBufferLocation = ZSTD_not_in_dst; - { - size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); + { size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); @@ -1988,8 +1852,7 @@ ZSTD_decompressSequencesLong_body( } /* last literal segment */ - if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */ - { + if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */ size_t const lastLLSize = litBufferEnd - litPtr; RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); if (op != NULL) { From c123e69ad087cea5b779ce2a26b0845810783d12 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 16 Jun 2023 16:24:48 -0700 Subject: [PATCH 055/283] fixed static analyzer false positive regarding @sequence initialization make a mock initialization to please the tool --- lib/decompress/zstd_decompress_block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 0d6c2e01103..c90536ad716 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -1429,7 +1429,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, BIT_DStream_completed < BIT_DStream_overflow); /* decompress without overrunning litPtr begins */ - { seq_t sequence; + { seq_t sequence = {0,0,0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one interation) */ /* Align the decompression loop to 32 + 16 bytes. * * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression From e4aeaebc201ba49fec50b087aeb15343c63712e5 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 20 Jun 2023 08:34:26 -0700 Subject: [PATCH 056/283] fixed incorrect test in Win32 pthread wrapper reported by @Banzai24-yht in #3683 --- .gitignore | 1 + lib/common/threading.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index a136ea39496..e95a8d07cd8 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ build-* _codelite/ _zstdbench/ .clang_complete +compile_flags.txt *.idea *.swp .DS_Store diff --git a/lib/common/threading.c b/lib/common/threading.c index ca155b9b9db..25bb8b98104 100644 --- a/lib/common/threading.c +++ b/lib/common/threading.c @@ -73,10 +73,12 @@ int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, ZSTD_thread_params_t thread_param; (void)unused; + if (thread==NULL) return -1; + *thread = NULL; + thread_param.start_routine = start_routine; thread_param.arg = arg; thread_param.initialized = 0; - *thread = NULL; /* Setup thread initialization synchronization */ if(ZSTD_pthread_cond_init(&thread_param.initialized_cond, NULL)) { @@ -91,7 +93,7 @@ int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, /* Spawn thread */ *thread = (HANDLE)_beginthreadex(NULL, 0, worker, &thread_param, 0, NULL); - if (!thread) { + if (*thread==NULL) { ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex); ZSTD_pthread_cond_destroy(&thread_param.initialized_cond); return errno; @@ -137,6 +139,7 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread) int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr) { + assert(mutex != NULL); *mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t)); if (!*mutex) return 1; @@ -145,6 +148,7 @@ int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t con int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex) { + assert(mutex != NULL); if (!*mutex) return 0; { @@ -156,6 +160,7 @@ int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex) int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr) { + assert(cond != NULL); *cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t)); if (!*cond) return 1; @@ -164,6 +169,7 @@ int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond) { + assert(cond != NULL); if (!*cond) return 0; { From f307493711b74ddfdbf9da711f75c4e07bcc93a3 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Wed, 21 Jun 2023 12:16:26 -0400 Subject: [PATCH 057/283] Update FreeBSD CI images to latest supported releases --- .cirrus.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index 27ca65e8d29..047e77f69fa 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -2,8 +2,8 @@ task: name: FreeBSD (shortest) freebsd_instance: matrix: - image_family: freebsd-13-0 - image_family: freebsd-12-2 + image_family: freebsd-13-2 + image_family: freebsd-12-4 install_script: pkg install -y gmake coreutils script: | MOREFLAGS="-Werror" gmake -j all From c6a888c073a0a6693026e67e8db3813ba6b78850 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 21 Jun 2023 18:52:50 -0700 Subject: [PATCH 058/283] suppress false error message in LDM mode --- lib/compress/zstd_compress.c | 10 +++------- lib/compress/zstd_compress_internal.h | 3 +-- lib/compress/zstdmt_compress.c | 5 +---- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 19446b63ff3..48cee4db74b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4609,19 +4609,15 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) } } -size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) +void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) { - RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, - "wrong cctx stage"); - RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable, - parameter_unsupported, - "incompatible with ldm"); + assert(cctx->stage == ZSTDcs_init); + assert(nbSeq == 0 || cctx->appliedParams.ldmParams.enableLdm != ZSTD_ps_enable); cctx->externSeqStore.seq = seq; cctx->externSeqStore.size = nbSeq; cctx->externSeqStore.capacity = nbSeq; cctx->externSeqStore.pos = 0; cctx->externSeqStore.posInSequence = 0; - return 0; } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 10f68d010ec..ac8dfb71a41 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -1467,11 +1467,10 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); * This cannot be used when long range matching is enabled. * Zstd will use these sequences, and pass the literals to a secondary block * compressor. - * @return : An error code on failure. * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory * access and data corruption. */ -size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); +void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); /** ZSTD_cycleLog() : * condition for correct operation : hashLog > 1 */ diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 6786075569b..add99d769be 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -601,11 +601,8 @@ static void ZSTDMT_serialState_update(serialState_t* serialState, ZSTD_pthread_mutex_unlock(&serialState->mutex); if (seqStore.size > 0) { - size_t const err = ZSTD_referenceExternalSequences( - jobCCtx, seqStore.seq, seqStore.size); + ZSTD_referenceExternalSequences(jobCCtx, seqStore.seq, seqStore.size); assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable); - assert(!ZSTD_isError(err)); - (void)err; } } From b1a30e2b4a69e6fcca9c2a6f9d4e43e8e3b243c8 Mon Sep 17 00:00:00 2001 From: Nidhi Jaju Date: Mon, 26 Jun 2023 00:07:30 +0000 Subject: [PATCH 059/283] hide asm functions on apple platforms --- lib/common/portability_macros.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/common/portability_macros.h b/lib/common/portability_macros.h index 8fd6ea82d19..e50314a78e4 100644 --- a/lib/common/portability_macros.h +++ b/lib/common/portability_macros.h @@ -68,6 +68,8 @@ /* Mark the internal assembly functions as hidden */ #ifdef __ELF__ # define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func +#elif defined(__APPLE__) +# define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func #else # define ZSTD_HIDE_ASM_FUNCTION(func) #endif From 1a6278c82d3b35cd8abd82db7bc5dc0907b838dd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Jun 2023 05:59:38 +0000 Subject: [PATCH 060/283] Bump github/codeql-action from 2.3.2 to 2.20.1 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.3.2 to 2.20.1. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/f3feb00acb00f31a6f60280e6ace9ca31d91c76a...f6e388ebf0efc915c6c5b165b019ee61a6746a38) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index d9aea8adbf4..42579d25065 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -59,6 +59,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@f3feb00acb00f31a6f60280e6ace9ca31d91c76a # tag=v2.3.2 + uses: github/codeql-action/upload-sarif@f6e388ebf0efc915c6c5b165b019ee61a6746a38 # tag=v2.20.1 with: sarif_file: results.sarif From 2c97f5dbedb0b581c733eb658665a9cc886eccef Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Jun 2023 05:59:43 +0000 Subject: [PATCH 061/283] Bump ossf/scorecard-action from 2.1.3 to 2.2.0 Bumps [ossf/scorecard-action](https://github.com/ossf/scorecard-action) from 2.1.3 to 2.2.0. - [Release notes](https://github.com/ossf/scorecard-action/releases) - [Changelog](https://github.com/ossf/scorecard-action/blob/main/RELEASE.md) - [Commits](https://github.com/ossf/scorecard-action/compare/80e868c13c90f172d68d1f4501dee99e2479f7af...08b4669551908b1024bb425080c797723083c031) --- updated-dependencies: - dependency-name: ossf/scorecard-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index d9aea8adbf4..4edb140fe70 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -32,7 +32,7 @@ jobs: persist-credentials: false - name: "Run analysis" - uses: ossf/scorecard-action@80e868c13c90f172d68d1f4501dee99e2479f7af # tag=v2.1.3 + uses: ossf/scorecard-action@08b4669551908b1024bb425080c797723083c031 # tag=v2.2.0 with: results_file: results.sarif results_format: sarif From 065ea9274fbbf794616df86a6376cd1c7f0dd5ca Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Jul 2023 05:37:59 +0000 Subject: [PATCH 062/283] Bump github/codeql-action from 2.20.1 to 2.20.3 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.20.1 to 2.20.3. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/f6e388ebf0efc915c6c5b165b019ee61a6746a38...46ed16ded91731b2df79a2893d3aea8e9f03b5c4) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 0dd1e59fa88..5737abc8d6a 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -59,6 +59,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@f6e388ebf0efc915c6c5b165b019ee61a6746a38 # tag=v2.20.1 + uses: github/codeql-action/upload-sarif@46ed16ded91731b2df79a2893d3aea8e9f03b5c4 # tag=v2.20.3 with: sarif_file: results.sarif From a1b9a5ad0e1a10bea2315132bef21de3ed9cebc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niko=20Hoffr=C3=A9n?= Date: Wed, 19 Jul 2023 21:49:43 +0300 Subject: [PATCH 063/283] Fix typographical error in README.md --- CONTRIBUTING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f5e747ae1b9..47f5bb8f0f3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -171,8 +171,8 @@ who want earlier signal. | Cirrus CI | Used for testing on FreeBSD | https://github.com/marketplace/cirrus-ci/ | `.cirrus.yml` | | Circle CI | Historically was used to provide faster signal,
but we may be able to migrate these to Github Actions | https://circleci.com/docs/2.0/getting-started/#setting-up-circleci
https://youtu.be/Js3hMUsSZ2c
https://circleci.com/docs/2.0/enable-checks/ | `.circleci/config.yml` | -Note: the instructions linked above mostly cover how to set up a repository with CI from scratch. -The general idea should be the same for setting up CI on your fork of zstd, but you may have to +Note: the instructions linked above mostly cover how to set up a repository with CI from scratch. +The general idea should be the same for setting up CI on your fork of zstd, but you may have to follow slightly different steps. In particular, please ignore any instructions related to setting up config files (since zstd already has configs for each of these services). @@ -216,7 +216,7 @@ will typically not be stable enough to obtain reliable benchmark results. If you hands on a desktop, this is usually a better scenario. Of course, benchmarking can be done on non-hyper-stable machines as well. You will just have to -do a little more work to ensure that you are in fact measuring the changes you've made not and +do a little more work to ensure that you are in fact measuring the changes you've made and not noise. Here are some things you can do to make your benchmarks more stable: 1. The most simple thing you can do to drastically improve the stability of your benchmark is From 55ff3e4e17ea42a7c3726e51945c483a18d8c4c8 Mon Sep 17 00:00:00 2001 From: Jacob Greenfield Date: Tue, 18 Jul 2023 10:48:46 -0400 Subject: [PATCH 064/283] Save one byte on the frame epilogue --- lib/compress/zstd_compress.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 48cee4db74b..209f3b0eeac 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -5192,8 +5192,9 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) if (cctx->stage != ZSTDcs_ending) { /* write one last empty block, make it the "last" block */ U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; - RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); - MEM_writeLE32(op, cBlockHeader24); + ZSTD_STATIC_ASSERT(ZSTD_BLOCKHEADERSIZE == 3); + RETURN_ERROR_IF(dstCapacity<3, dstSize_tooSmall, "no room for epilogue"); + MEM_writeLE24(op, cBlockHeader24); op += ZSTD_blockHeaderSize; dstCapacity -= ZSTD_blockHeaderSize; } From de6b46dfc80d950a32176c7eca79bb229d47f501 Mon Sep 17 00:00:00 2001 From: Gianfranco Costamagna Date: Fri, 7 Jul 2023 09:26:30 +0200 Subject: [PATCH 065/283] Update fileio.c: fix build failure with enabled LTO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some reasons when LTO is enabled, the compiler complains about statbuf variable not being correctly initialized, even though the variable has an assert != NULL just few lines below (FIO_getDictFileStat) This is the fixed build failure: x86_64-linux-gnu-gcc -g -O2 -ffile-prefix-map=/<>=. -flto=auto -ffat-lto-objects -fstack-protector-strong -Wformat -Werror=format-security -fdebug-prefix-map=/<>=/usr/src/libzstd-1.5.5+dfsg2-1 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef -Wpointer-arith -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings -Wredundant-decls -Wmissing-prototypes -Wc++-compat -g -Werror -Wa,--noexecstack -Wdate-time -D_FORTIFY_SOURCE=2 -DXXH_NAMESPACE=ZSTD_ -DDEBUGLEVEL=1 -DZSTD_LEGACY_SUPPORT=5 -DZSTD_MULTITHREAD -DZSTD_GZCOMPRESS -DZSTD_GZDECOMPRESS -DZSTD_LZMACOMPRESS -DZSTD_LZMADECOMPRESS -DZSTD_LZ4COMPRESS -DZSTD_LZ4DECOMPRESS -DZSTD_LEGACY_SUPPORT=5 -c -MT obj/conf_086c46a51a716b674719b8acb8484eb8/zstdcli_trace.o -MMD -MP -MF obj/conf_086c46a51a716b674719b8acb8484eb8/zstdcli_trace.d -o obj/conf_086c46a51a716b674719b8acb8484eb8/zstdcli_trace.o zstdcli_trace.c In function ‘UTIL_isRegularFileStat’, inlined from ‘UTIL_getFileSizeStat’ at util.c:524:10, inlined from ‘FIO_createDResources’ at fileio.c:2230:30: util.c:209:12: error: ‘statbuf.st_mode’ may be used uninitialized [-Werror=maybe-uninitialized] 209 | return S_ISREG(statbuf->st_mode) != 0; | ^ fileio.c: In function ‘FIO_createDResources’: fileio.c:2223:12: note: ‘statbuf’ declared here 2223 | stat_t statbuf; | ^ lto1: all warnings being treated as errors --- programs/fileio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/fileio.c b/programs/fileio.c index 84a0f48f782..2172325423e 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -2222,6 +2222,7 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable; stat_t statbuf; dRess_t ress; + memset(&statbuf, 0, sizeof(statbuf)); memset(&ress, 0, sizeof(ress)); FIO_getDictFileStat(dictFileName, &statbuf); From 4d267f3d4f9f85eecf98d1a2353408b8e840f1a3 Mon Sep 17 00:00:00 2001 From: void0red <30990023+void0red@users.noreply.github.com> Date: Fri, 21 Jul 2023 12:17:03 +0800 Subject: [PATCH 066/283] fileio_asyncio: handle malloc fails in AIO_ReadPool_create --- programs/fileio_asyncio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/fileio_asyncio.c b/programs/fileio_asyncio.c index fe9cca95d1f..dbf0c756414 100644 --- a/programs/fileio_asyncio.c +++ b/programs/fileio_asyncio.c @@ -551,6 +551,7 @@ ReadPoolCtx_t* AIO_ReadPool_create(const FIO_prefs_t* prefs, size_t bufferSize) AIO_IOPool_init(&ctx->base, prefs, AIO_ReadPool_executeReadJob, bufferSize); ctx->coalesceBuffer = (U8*) malloc(bufferSize * 2); + if(!ctx->coalesceBuffer) EXM_THROW(100, "Allocation error : not enough memory"); ctx->srcBuffer = ctx->coalesceBuffer; ctx->srcBufferLoaded = 0; ctx->completedJobsCount = 0; From e99d554903643e8dda22d664bbba62e8a9d5b0b0 Mon Sep 17 00:00:00 2001 From: jysh1214 Date: Tue, 1 Aug 2023 10:43:33 +0800 Subject: [PATCH 067/283] Fixed typo --- tests/playTests.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/playTests.sh b/tests/playTests.sh index 05dc0042999..bdbd00142cb 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -16,18 +16,18 @@ datagen() { } zstd() { - if [ -z "$EXEC_PREFIX" ]; then + if [ -z "$EXE_PREFIX" ]; then "$ZSTD_BIN" "$@" else - "$EXEC_PREFIX" "$ZSTD_BIN" "$@" + "$EXE_PREFIX" "$ZSTD_BIN" "$@" fi } sudoZstd() { - if [ -z "$EXEC_PREFIX" ]; then + if [ -z "$EXE_PREFIX" ]; then sudo "$ZSTD_BIN" "$@" else - sudo "$EXEC_PREFIX" "$ZSTD_BIN" "$@" + sudo "$EXE_PREFIX" "$ZSTD_BIN" "$@" fi } From 78dbba76b81ea1d8713900b57bc5d5f5f43bf74b Mon Sep 17 00:00:00 2001 From: Quentin Boswank Date: Sun, 13 Aug 2023 19:44:15 +0200 Subject: [PATCH 068/283] Updated Makefiles for full MSYS2 and Cygwin installation and testing support. They are Linux-like environments under Windows and have all the tools needed to support staged installation and testing. Beware: this only affects the make build system. --- Makefile | 2 +- lib/Makefile | 2 +- programs/Makefile | 2 +- tests/Makefile | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 72a9480aab1..fd95c38901d 100644 --- a/Makefile +++ b/Makefile @@ -151,7 +151,7 @@ clean: #------------------------------------------------------------------------------ # make install is validated only for Linux, macOS, Hurd and some BSD targets #------------------------------------------------------------------------------ -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly NetBSD MSYS_NT Haiku AIX)) +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly NetBSD MSYS_NT CYGWIN_NT Haiku AIX)) HOST_OS = POSIX diff --git a/lib/Makefile b/lib/Makefile index a4cf61ab10e..6d349a3b48c 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -249,7 +249,7 @@ clean: #----------------------------------------------------------------------------- # make install is validated only for below listed environments #----------------------------------------------------------------------------- -ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX)) +ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX MSYS_NT CYGWIN_NT)) lib: libzstd.pc diff --git a/programs/Makefile b/programs/Makefile index 8507abef3f0..be83c249336 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -346,7 +346,7 @@ include $(wildcard $(DEPFILES)) #----------------------------------------------------------------------------- # make install is validated only for Linux, macOS, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- -ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX)) +ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX MSYS_NT CYGWIN_NT)) HAVE_COLORNEVER = $(shell echo a | egrep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0) EGREP_OPTIONS ?= diff --git a/tests/Makefile b/tests/Makefile index 778c7d67597..c31e7500558 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -263,7 +263,7 @@ clean: # valgrind tests validated only for some posix platforms #---------------------------------------------------------------------------------- UNAME := $(shell uname) -ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS AIX)) +ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS AIX CYGWIN_NT)) HOST_OS = POSIX .PHONY: test-valgrind From 969e54f26ee5e03677d47b6449be02fe48e6d349 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 11 Aug 2023 17:09:07 -0700 Subject: [PATCH 069/283] Improve dual license wording in README We are licensed under BSD or GPLv2. It is clear in our headers, but not in the README. Fixes #3717 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f91e68fdb10..89857bf9bee 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ targeting real-time compression scenarios at zlib-level and better compression r It's backed by a very fast entropy stage, provided by [Huff0 and FSE library](https://github.com/Cyan4973/FiniteStateEntropy). Zstandard's format is stable and documented in [RFC8878](https://datatracker.ietf.org/doc/html/rfc8878). Multiple independent implementations are already available. -This repository represents the reference implementation, provided as an open-source dual [BSD](LICENSE) and [GPLv2](COPYING) licensed **C** library, +This repository represents the reference implementation, provided as an open-source dual [BSD](LICENSE) OR [GPLv2](COPYING) licensed **C** library, and a command line utility producing and decoding `.zst`, `.gz`, `.xz` and `.lz4` files. Should your project require another programming language, a list of known ports and bindings is provided on [Zstandard homepage](https://facebook.github.io/zstd/#other-languages). @@ -213,7 +213,7 @@ Zstandard is considered safe for production environments. ## License -Zstandard is dual-licensed under [BSD](LICENSE) and [GPLv2](COPYING). +Zstandard is dual-licensed under [BSD](LICENSE) OR [GPLv2](COPYING). ## Contributing From 5f5bdc1e5d23544391df1c47cec3a69b96a09f5b Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 16 Aug 2023 12:08:52 -0400 Subject: [PATCH 070/283] Easy: Move Helper Functions Up --- lib/compress/zstd_cwksp.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/compress/zstd_cwksp.h b/lib/compress/zstd_cwksp.h index cfe808f1505..9aeed19438e 100644 --- a/lib/compress/zstd_cwksp.h +++ b/lib/compress/zstd_cwksp.h @@ -636,6 +636,15 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { ZSTD_cwksp_assert_internal_consistency(ws); } +MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); +} + +MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) + + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); +} + /** * The provided workspace takes ownership of the buffer [start, start+size). * Any existing values in the workspace are ignored (the previously managed @@ -680,15 +689,6 @@ MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { ZSTD_memset(src, 0, sizeof(ZSTD_cwksp)); } -MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { - return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); -} - -MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { - return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) - + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); -} - MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { return ws->allocFailed; } From 9987d2f5942a7701b388eec4307be71a121e5652 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 16 Aug 2023 12:09:12 -0400 Subject: [PATCH 071/283] Unpoison Workspace Memory Before Freeing to Custom Free MSAN is hooked into the system malloc, but when the user provides a custom allocator, it may not provide the same cleansing behavior. So if we leave memory poisoned and return it to the user's allocator, where it is re-used elsewhere, our poisoning can blow up in some other context. --- lib/compress/zstd_cwksp.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/compress/zstd_cwksp.h b/lib/compress/zstd_cwksp.h index 9aeed19438e..a3efc56e523 100644 --- a/lib/compress/zstd_cwksp.h +++ b/lib/compress/zstd_cwksp.h @@ -676,6 +676,11 @@ MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { void *ptr = ws->workspace; DEBUGLOG(4, "cwksp: freeing workspace"); +#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE) + if (ptr != NULL && customMem.customFree != NULL) { + __msan_unpoison(ptr, ZSTD_cwksp_sizeof(ws)); + } +#endif ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp)); ZSTD_customFree(ptr, customMem); } From a07d7c4e29f9329a1c98fbecc2e54ed6b663caef Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 16 Aug 2023 10:43:39 -0700 Subject: [PATCH 072/283] added ZSTD_decompressDCtx() benchmark option to fullbench useful to compare the difference between ZSTD_decompress and ZSTD_decompressDCtx(). --- tests/fullbench.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/fullbench.c b/tests/fullbench.c index 0391107b993..c8f0c0af1f9 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -138,7 +138,14 @@ static size_t local_ZSTD_decompress(const void* src, size_t srcSize, return ZSTD_decompress(dst, dstSize, buff2, g_cSize); } -static ZSTD_DCtx* g_zdc = NULL; +static ZSTD_DCtx* g_zdc = NULL; /* will be initialized within benchMem */ +static size_t local_ZSTD_decompressDCtx(const void* src, size_t srcSize, + void* dst, size_t dstSize, + void* buff2) +{ + (void)src; (void)srcSize; + return ZSTD_decompressDCtx(g_zdc, dst, dstSize, buff2, g_cSize); +} #ifndef ZSTD_DLL_IMPORT @@ -452,6 +459,9 @@ static int benchMem(unsigned benchNb, case 3: benchFunction = local_ZSTD_compress_freshCCtx; benchName = "compress_freshCCtx"; break; + case 4: + benchFunction = local_ZSTD_decompressDCtx; benchName = "decompressDCtx"; + break; #ifndef ZSTD_DLL_IMPORT case 11: benchFunction = local_ZSTD_compressContinue; benchName = "compressContinue"; @@ -551,6 +561,9 @@ static int benchMem(unsigned benchNb, case 3: payload = &cparams; break; + case 4: + g_cSize = ZSTD_compress(dstBuff2, dstBuffSize, src, srcSize, cLevel); + break; #ifndef ZSTD_DLL_IMPORT case 11: payload = &cparams; From a02d81f944c24aca2ccca2f16a6a96474f97e18b Mon Sep 17 00:00:00 2001 From: Mustafa UZUN Date: Sat, 19 Aug 2023 15:44:53 +0300 Subject: [PATCH 073/283] fix: ZSTD_BUILD_DECOMPRESSION message --- build/cmake/lib/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index 3cab017a7ce..cf1252e1964 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -13,7 +13,7 @@ set(CMAKE_INCLUDE_CURRENT_DIR TRUE) option(ZSTD_BUILD_STATIC "BUILD STATIC LIBRARIES" ON) option(ZSTD_BUILD_SHARED "BUILD SHARED LIBRARIES" ON) option(ZSTD_BUILD_COMPRESSION "BUILD COMPRESSION MODULE" ON) -option(ZSTD_BUILD_DECOMPRESSION "BUILD DECOMPRESSION MODUEL" ON) +option(ZSTD_BUILD_DECOMPRESSION "BUILD DECOMPRESSION MODULE" ON) option(ZSTD_BUILD_DICTBUILDER "BUILD DICTBUILDER MODULE" ON) option(ZSTD_BUILD_DEPRECATED "BUILD DEPRECATED MODULE" OFF) From db0ae65436c6aa977b60b8a7fd7d4522a3cd14ae Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Aug 2023 05:51:56 +0000 Subject: [PATCH 074/283] Bump github/codeql-action from 2.20.3 to 2.21.4 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.20.3 to 2.21.4. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/46ed16ded91731b2df79a2893d3aea8e9f03b5c4...a09933a12a80f87b87005513f0abb1494c27a716) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 5737abc8d6a..0d28381181c 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -59,6 +59,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@46ed16ded91731b2df79a2893d3aea8e9f03b5c4 # tag=v2.20.3 + uses: github/codeql-action/upload-sarif@a09933a12a80f87b87005513f0abb1494c27a716 # tag=v2.21.4 with: sarif_file: results.sarif From bd02c9be6e3708c6dd53f4df1f4dc13d29441e89 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 21 Aug 2023 11:33:29 -0700 Subject: [PATCH 075/283] No longer reject dictionaries with literals maxSymbolValue < 255 We already have logic in our Huffman encoder to validate Huffman tables with missing symbols. We use this for higher compression levels to re-use the previous blocks statistics, or when the dictionaries table has zero-weighted symbols. This check was leftover as an oversight from before we added validation for Huffman tables. I validated that the `dictionary_loader` fuzzer has coverage of every line in the `ZSTD_loadCEntropy()` function to validate that it is correctly testing this function. --- lib/compress/zstd_compress.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 209f3b0eeac..36c1f99e800 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4904,11 +4904,10 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, /* We only set the loaded table as valid if it contains all non-zero * weights. Otherwise, we set it to check */ - if (!hasZeroWeights) + if (!hasZeroWeights && maxSymbolValue == 255) bs->entropy.huf.repeatMode = HUF_repeat_valid; RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); dictPtr += hufHeaderSize; } From ecb86d82868d60517453151127b229c96ff89fec Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 8 Aug 2023 08:00:00 +0000 Subject: [PATCH 076/283] zdictlib: fix prototype mismatch Fix the following warnings reported by the compiler when ZDICTLIB_STATIC_API is not defined to ZDICTLIB_API: lib/dictBuilder/cover.c:1122:21: warning: redeclaration of 'ZDICT_optimizeTrainFromBuffer_cover' with different visibility (old visibility preserved) lib/dictBuilder/cover.c:736:21: warning: redeclaration of 'ZDICT_trainFromBuffer_cover' with different visibility (old visibility +preserved) lib/dictBuilder/fastcover.c:549:1: warning: redeclaration of 'ZDICT_trainFromBuffer_fastCover' with different visibility (old visibility preserved) lib/dictBuilder/fastcover.c:618:1: warning: redeclaration of 'ZDICT_optimizeTrainFromBuffer_fastCover' with different visibility (old visibility preserved) --- lib/dictBuilder/cover.c | 4 ++-- lib/dictBuilder/fastcover.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 9e5e7d5b55d..e7fcfd2099b 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -733,7 +733,7 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, return tail; } -ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( +ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover( void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_cover_params_t parameters) @@ -1119,7 +1119,7 @@ static void COVER_tryParameters(void *opaque) free(freqs); } -ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( +ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover( void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_cover_params_t* parameters) diff --git a/lib/dictBuilder/fastcover.c b/lib/dictBuilder/fastcover.c index 46bba0120b0..a958eb337f1 100644 --- a/lib/dictBuilder/fastcover.c +++ b/lib/dictBuilder/fastcover.c @@ -545,7 +545,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams, } -ZDICTLIB_API size_t +ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, @@ -614,7 +614,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, } -ZDICTLIB_API size_t +ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_fastCover( void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, From c27fa399042f466080e79bb4fd8a4871bc0bcf28 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 24 Aug 2023 16:33:42 -0700 Subject: [PATCH 077/283] Work around nullptr-with-nonzero-offset warning See comment. --- lib/decompress/zstd_decompress.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 94eb95151b5..ccfd84fa42d 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1548,6 +1548,12 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->stage = ZSTDds_getFrameHeaderSize; dctx->processedCSize = 0; dctx->decodedSize = 0; + /* Set to non-null because ZSTD_prefetchMatch() may end up doing addition + * with this value for corrupted frames. However, it then just passes the + * pointer to PREFETCH_L1(), which doesn't require valid pointers. But, + * if it is NULL we get nullptr-with-nonzero-offset UBSAN warnings. + */ + dctx->previousDstEnd = ""; dctx->previousDstEnd = NULL; dctx->prefixStart = NULL; dctx->virtualStart = NULL; From 396ef5b434e5e7f15773a7495f374a99a6377778 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 24 Aug 2023 14:41:21 -0700 Subject: [PATCH 078/283] Fix & refactor Huffman repeat tables for dictionaries The Huffman repeat mode checker assumed that the CTable was zeroed in the region `[maxSymbolValue + 1, 256)`. This assumption didn't hold for tables built in the dictionaries, because it didn't go through the same codepath. Since this code was originally written, we added a header to the CTable that specifies the `tableLog`. Add `maxSymbolValue` to that header, and check that the table's `maxSymbolValue` is at least the block's `maxSymbolValue`. This solution is cleaner because we write this header for every CTable we build, so it can't be missed in any code path. Credit to OSS-Fuzz --- lib/common/huf.h | 15 ++++++++- lib/compress/huf_compress.c | 61 ++++++++++++++++++++++++++----------- 2 files changed, 58 insertions(+), 18 deletions(-) diff --git a/lib/common/huf.h b/lib/common/huf.h index 73d1ee56543..99bf85d6f4e 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -197,9 +197,22 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void /** HUF_getNbBitsFromCTable() : * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX - * Note 1 : is not inlined, as HUF_CElt definition is private */ + * Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0 + * Note 2 : is not inlined, as HUF_CElt definition is private + */ U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue); +typedef struct { + BYTE tableLog; + BYTE maxSymbolValue; + BYTE unused[sizeof(size_t) - 2]; +} HUF_CTableHeader; + +/** HUF_readCTableHeader() : + * @returns The header from the CTable specifying the tableLog and the maxSymbolValue. + */ +HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable); + /* * HUF_decompress() does the following: * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 29871877a7f..3fe25789603 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -220,6 +220,25 @@ static void HUF_setValue(HUF_CElt* elt, size_t value) } } +HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable) +{ + HUF_CTableHeader header; + ZSTD_memcpy(&header, ctable, sizeof(header)); + return header; +} + +static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue) +{ + HUF_CTableHeader header; + HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header)); + ZSTD_memset(&header, 0, sizeof(header)); + assert(tableLog < 256); + header.tableLog = (BYTE)tableLog; + assert(maxSymbolValue < 256); + header.maxSymbolValue = (BYTE)maxSymbolValue; + ZSTD_memcpy(ctable, &header, sizeof(header)); +} + typedef struct { HUF_CompressWeightsWksp wksp; BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ @@ -237,6 +256,9 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp)); + assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue); + assert(HUF_readCTableHeader(CTable).tableLog == huffLog); + /* check conditions */ if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); @@ -283,7 +305,9 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); - CTable[0] = tableLog; + *maxSymbolValuePtr = nbSymbols - 1; + + HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr); /* Prepare base value per rank */ { U32 n, nextRankStart = 0; @@ -315,7 +339,6 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void { U32 n; for (n=0; n HUF_readCTableHeader(CTable).maxSymbolValue) + return 0; return (U32)HUF_getNbBits(ct[symbolValue]); } @@ -723,7 +748,8 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */ for (n=0; nCTable + 1, maxSymbolValue+1)); } - /* Zero unused symbols in CTable, so we can check it for validity */ - { - size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue); - size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt); - ZSTD_memset(table->CTable + ctableSize, 0, unusedSize); - } /* Write table description header */ { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog, From 253873220f26c0fd43aef740751355f91f40b750 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Sat, 26 Aug 2023 00:40:35 +0200 Subject: [PATCH 079/283] Fixed zstd cmake shared build on windows --- build/cmake/programs/CMakeLists.txt | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/build/cmake/programs/CMakeLists.txt b/build/cmake/programs/CMakeLists.txt index 58d998e4275..6a816586623 100644 --- a/build/cmake/programs/CMakeLists.txt +++ b/build/cmake/programs/CMakeLists.txt @@ -32,7 +32,17 @@ if (MSVC) set(PlatformDependResources ${MSVC_RESOURCE_DIR}/zstd.rc) endif () -add_executable(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/fileio_asyncio.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${PROGRAMS_DIR}/zstdcli_trace.c ${PlatformDependResources}) +set(ZSTD_PROGRAM_SRCS ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c + ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c + ${PROGRAMS_DIR}/fileio_asyncio.c ${PROGRAMS_DIR}/benchfn.c + ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c + ${PROGRAMS_DIR}/dibio.c ${PROGRAMS_DIR}/zstdcli_trace.c + ${PlatformDependResources}) +if (MSVC AND ZSTD_PROGRAMS_LINK_SHARED) + list(APPEND ZSTD_PROGRAM_SRCS ${LIBRARY_DIR}/common/pool.c ${LIBRARY_DIR}/common/threading.c) +endif () + +add_executable(zstd ${ZSTD_PROGRAM_SRCS}) target_link_libraries(zstd ${PROGRAMS_ZSTD_LINK_TARGET}) if (CMAKE_SYSTEM_NAME MATCHES "(Solaris|SunOS)") target_link_libraries(zstd rt) @@ -75,7 +85,9 @@ if (UNIX) ${CMAKE_CURRENT_BINARY_DIR}/zstdless.1 DESTINATION "${MAN_INSTALL_DIR}") - add_executable(zstd-frugal ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/fileio_asyncio.c) + add_executable(zstd-frugal ${PROGRAMS_DIR}/zstdcli.c + ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c + ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/fileio_asyncio.c) target_link_libraries(zstd-frugal ${PROGRAMS_ZSTD_LINK_TARGET}) set_property(TARGET zstd-frugal APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_NOBENCH;ZSTD_NODICT;ZSTD_NOTRACE") endif () From 839c7939e825d9a6a24eea4122b5cfd4ab8b5243 Mon Sep 17 00:00:00 2001 From: klausholstjacobsen Date: Sun, 3 Sep 2023 10:10:23 +0200 Subject: [PATCH 080/283] Added qnx in the posix test section of platform.h --- programs/platform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/platform.h b/programs/platform.h index 18a3587bfe2..43c5dc9694d 100644 --- a/programs/platform.h +++ b/programs/platform.h @@ -89,7 +89,7 @@ extern "C" { */ # elif !defined(_WIN32) \ && ( defined(__unix__) || defined(__unix) \ - || defined(__midipix__) || defined(__VMS) || defined(__HAIKU__) ) + || defined(_QNX_SOURCE) || defined(__midipix__) || defined(__VMS) || defined(__HAIKU__) ) # if defined(__linux__) || defined(__linux) || defined(__CYGWIN__) # ifndef _POSIX_C_SOURCE From e0e309f27cf73f407f77cfce485203636678a46a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Sep 2023 05:09:02 +0000 Subject: [PATCH 081/283] Bump actions/checkout from 3.5.3 to 4.0.0 Bumps [actions/checkout](https://github.com/actions/checkout) from 3.5.3 to 4.0.0. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/c85c95e3d7251135ab7dc9ce3241c5835cc595a9...3df4ab11eba7bda6032a0b82a6bb43b11571feac) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/dev-long-tests.yml | 50 ++++++------- .github/workflows/dev-short-tests.yml | 74 +++++++++---------- .../workflows/publish-release-artifacts.yml | 2 +- .github/workflows/scorecards.yml | 2 +- .github/workflows/windows-artifacts.yml | 2 +- 5 files changed, 65 insertions(+), 65 deletions(-) diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml index 6a0e338f0dd..34ace919c80 100644 --- a/.github/workflows/dev-long-tests.yml +++ b/.github/workflows/dev-long-tests.yml @@ -15,7 +15,7 @@ jobs: make-all: runs-on: ubuntu-latest steps: - - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 + - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3 - name: make all run: make all @@ -26,7 +26,7 @@ jobs: DEVNULLRIGHTS: 1 READFROMBLOCKDEVICE: 1 steps: - - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 + - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3 - name: make test run: make test @@ -34,7 +34,7 @@ jobs: make-test-osx: runs-on: macos-latest steps: - - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 + - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3 - name: OS-X test run: make test # make -c lib all doesn't work because of the fact that it's not a tty @@ -45,7 +45,7 @@ jobs: DEVNULLRIGHTS: 1 READFROMBLOCKDEVICE: 1 steps: - - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 + - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3 - name: make test run: | sudo apt-get -qqq update @@ -55,21 +55,21 @@ jobs: no-intrinsics-fuzztest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 + - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3 - name: no intrinsics fuzztest run: MOREFLAGS="-DZSTD_NO_INTRINSICS" make -C tests fuzztest tsan-zstreamtest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 + - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3 - name: thread sanitizer zstreamtest run: CC=clang ZSTREAM_TESTTIME=-T3mn make tsan-test-zstream ubsan-zstreamtest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 + - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3 - name: undefined behavior sanitizer zstreamtest run: CC=clang make uasan-test-zstream @@ -77,7 +77,7 @@ jobs: tsan-fuzztest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 + - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3 - name: thread sanitizer fuzztest run: CC=clang make tsan-fuzztest @@ -85,7 +85,7 @@ jobs: big-tests-zstreamtest32: runs-on: ubuntu-latest steps: - - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 + - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3 - name: zstream tests in 32bit mode, with big tests run: | sudo apt-get -qqq update @@ -96,7 +96,7 @@ jobs: gcc-8-asan-ubsan-testzstd: runs-on: ubuntu-latest steps: - - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 + - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3 - name: gcc-8 + ASan + UBSan + Test Zstd # See https://askubuntu.com/a/1428822 run: | @@ -108,14 +108,14 @@ jobs: clang-asan-ubsan-testzstd: runs-on: ubuntu-latest steps: - - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3 + - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3 - name: clang + ASan + UBSan + Test Zstd run: CC=clang make -j uasan-test-zstd Date: Mon, 11 Sep 2023 05:09:05 +0000 Subject: [PATCH 082/283] Bump actions/upload-artifact from 3.1.2 to 3.1.3 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3.1.2 to 3.1.3. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/0b7f8abb1508181956e8e162db84b466c27e18ce...a8a3f3ad30e3422c9c7b888a15615d19a852ae32) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/dev-long-tests.yml | 2 +- .github/workflows/scorecards.yml | 2 +- .github/workflows/windows-artifacts.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml index 6a0e338f0dd..23d6fd11665 100644 --- a/.github/workflows/dev-long-tests.yml +++ b/.github/workflows/dev-long-tests.yml @@ -290,7 +290,7 @@ jobs: dry-run: false sanitizer: ${{ matrix.sanitizer }} - name: Upload Crash - uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # tag=v3.1.2 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # tag=v3.1.3 if: failure() && steps.build.outcome == 'success' with: name: ${{ matrix.sanitizer }}-artifacts diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 0d28381181c..8ec59172502 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -51,7 +51,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # tag=v3.1.2 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # tag=v3.1.3 with: name: SARIF file path: results.sarif diff --git a/.github/workflows/windows-artifacts.yml b/.github/workflows/windows-artifacts.yml index aec722bc587..262847d548c 100644 --- a/.github/workflows/windows-artifacts.yml +++ b/.github/workflows/windows-artifacts.yml @@ -52,7 +52,7 @@ jobs: cd .. - name: Publish zstd-$VERSION-${{matrix.ziparch}}.zip - uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # tag=v3 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # tag=v3 with: path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip name: zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip From d55ebb5718a1c7eaff65a720932aa628ccf4f66e Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Tue, 12 Sep 2023 19:12:19 +0200 Subject: [PATCH 083/283] [pzstd]: Fix `DESTDIR` handling to allow setting `BINDIR` Currently, setting `BINDIR` and `DESTDIR` separately is not possible, so the command below fails, as BINDIR is set explicitly: $ make -j80 install PREFIX=/usr EPREFIX=/usr BINDIR=/usr/bin SBINDIR=/usr/sbin LIBEXECDIR=/usr/libexec SYSCONFDIR=/etc SHAREDSTATEDIR=/var LOCALST ATEDIR=/var LIBDIR=/usr/lib INCLUDEDIR=/usr/include DATAROOTDIR=/usr/share DATADIR=/usr/share INFODIR=/usr/share/info LOCALEDIR=/usr/share/locale MAND IR=/usr/share/man DOCDIR=/usr/share/doc/zstd DESTDIR=/dev/shm/bee-pmenzel/zstd/zstd-1.5.5-0/image -C contrib/pzstd DESTDIR=/dev/shm/bee-pmenzel/zstd/zstd-1.5.5-0/image make: Entering directory '/dev/shm/bee-pmenzel/zstd/zstd-1.5.5-0/source/contrib/pzstd' CFLAGS=" -I../../lib -I../../lib/common -I../../programs -I. -DNDEBUG -O3 -Wall -Wextra -Wno-deprecated-declarations " LDFLAGS=" -O3 -Wall -Wextra -pedantic " make -C ../../lib libzstd.a make[1]: Entering directory '/dev/shm/bee-pmenzel/zstd/zstd-1.5.5-0/source/lib' make[1]: Leaving directory '/dev/shm/bee-pmenzel/zstd/zstd-1.5.5-0/source/lib' g++ main.o ../../programs/util.o Options.o Pzstd.o SkippableFrame.o ../../lib/libzstd.a -O3 -Wall -Wextra -pedantic -pthread -o pzstd install -d -m 755 /usr/bin/ install -m 755 pzstd /usr/bin/pzstd install: cannot create regular file '/usr/bin/pzstd': Permission denied make: *** [Makefile:116: install] Error 1 So, do not prefix `BINDIR` with `DESTDIR`, and adapt all paths for installation. This is more common, and, for example, `programs/Makefile` does the same. Fixes: 8b4e84249b ("[pzstd] Fix Makefile") --- contrib/pzstd/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/contrib/pzstd/Makefile b/contrib/pzstd/Makefile index e62f8e873c5..9604eb2438c 100644 --- a/contrib/pzstd/Makefile +++ b/contrib/pzstd/Makefile @@ -10,7 +10,7 @@ # Standard variables for installation DESTDIR ?= PREFIX ?= /usr/local -BINDIR := $(DESTDIR)$(PREFIX)/bin +BINDIR := $(PREFIX)/bin ZSTDDIR = ../../lib PROGDIR = ../../programs @@ -112,12 +112,12 @@ check: .PHONY: install install: PZSTD_CPPFLAGS += -DNDEBUG install: pzstd$(EXT) - install -d -m 755 $(BINDIR)/ - install -m 755 pzstd$(EXT) $(BINDIR)/pzstd$(EXT) + install -d -m 755 $(DESTDIR)$(BINDIR)/ + install -m 755 pzstd$(EXT) $(DESTDIR)$(BINDIR)/pzstd$(EXT) .PHONY: uninstall uninstall: - $(RM) $(BINDIR)/pzstd$(EXT) + $(RM) $(DESTDIR)$(BINDIR)/pzstd$(EXT) # Targets for many different builds .PHONY: all From b69d06a8102f0e04cde0bda2e34984099a0dfba4 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 27 Aug 2023 16:12:06 -0700 Subject: [PATCH 084/283] add include guards alleviate risks of double inclusion (typically via transitive includes) --- lib/libzstd.mk | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/libzstd.mk b/lib/libzstd.mk index 2c47ecdfa06..ce6e1137519 100644 --- a/lib/libzstd.mk +++ b/lib/libzstd.mk @@ -8,6 +8,10 @@ # You may select, at your option, one of the above-listed licenses. # ################################################################ +# Ensure the file is not included twice +ifndef LIBZSTD_MK_INCLUDED +LIBZSTD_MK_INCLUDED := 1 + ################################################################## # Input Variables ################################################################## @@ -223,3 +227,5 @@ endif # BUILD_DIR ZSTD_SUBDIR := $(LIBZSTD)/common $(LIBZSTD)/compress $(LIBZSTD)/decompress $(LIBZSTD)/dictBuilder $(LIBZSTD)/legacy $(LIBZSTD)/deprecated vpath %.c $(ZSTD_SUBDIR) vpath %.S $(ZSTD_SUBDIR) + +endif # LIBZSTD_MK_INCLUDED From 4edfaa93b7631e5fcb2911869ab77c833d73d142 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 27 Aug 2023 16:24:59 -0700 Subject: [PATCH 085/283] default targets of lib/ and programs/ have different names avoid risks on overlapping in case of include --- programs/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/programs/Makefile b/programs/Makefile index be83c249336..99e004c9456 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -15,8 +15,9 @@ # zstd-decompress : decompressor-only version of zstd # ########################################################################## -.PHONY: default -default: zstd-release +# default target (when runing `make` with no argument) +.PHONY: zstd-release +zstd-release: LIBZSTD := ../lib From feaa8ac50d4e0299f652a436e72cc64f9b504c38 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 27 Aug 2023 16:28:56 -0700 Subject: [PATCH 086/283] renamed STATLIB into STATICLIB for improved clarity --- lib/Makefile | 38 ++++++++++++++++++++------------------ lib/libzstd.mk | 2 ++ 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index 6d349a3b48c..9591cff0f1e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -8,6 +8,9 @@ # You may select, at your option, one of the above-listed licenses. # ################################################################ +# default target (when runing `make` with no argument) +lib-release: + # Modules ZSTD_LIB_COMPRESSION ?= 1 ZSTD_LIB_DECOMPRESSION ?= 1 @@ -54,12 +57,11 @@ VERSION := $(ZSTD_VERSION) # Note: by default, the static library is built single-threaded and dynamic library is built # multi-threaded. It is possible to force multi or single threaded builds by appending # -mt or -nomt to the build target (like lib-mt for multi-threaded, lib-nomt for single-threaded). -.PHONY: default -default: lib-release + CPPFLAGS_DYNLIB += -DZSTD_MULTITHREAD # dynamic library build defaults to multi-threaded LDFLAGS_DYNLIB += -pthread -CPPFLAGS_STATLIB += # static library build defaults to single-threaded +CPPFLAGS_STATICLIB += # static library build defaults to single-threaded ifeq ($(findstring GCC,$(CCVER)),GCC) @@ -91,7 +93,7 @@ all: lib .PHONY: libzstd.a # must be run every time -libzstd.a: CPPFLAGS += $(CPPFLAGS_STATLIB) +libzstd.a: CPPFLAGS += $(CPPFLAGS_STATICLIB) SET_CACHE_DIRECTORY = \ +$(MAKE) --no-print-directory $@ \ @@ -109,19 +111,19 @@ libzstd.a: else # BUILD_DIR is defined -ZSTD_STATLIB_DIR := $(BUILD_DIR)/static -ZSTD_STATLIB := $(ZSTD_STATLIB_DIR)/libzstd.a -ZSTD_STATLIB_OBJ := $(addprefix $(ZSTD_STATLIB_DIR)/,$(ZSTD_LOCAL_OBJ)) -$(ZSTD_STATLIB): ARFLAGS = rcs -$(ZSTD_STATLIB): | $(ZSTD_STATLIB_DIR) -$(ZSTD_STATLIB): $(ZSTD_STATLIB_OBJ) +ZSTD_STATICLIB_DIR := $(BUILD_DIR)/static +ZSTD_STATICLIB := $(ZSTD_STATICLIB_DIR)/libzstd.a +ZSTD_STATICLIB_OBJ := $(addprefix $(ZSTD_STATICLIB_DIR)/,$(ZSTD_LOCAL_OBJ)) +$(ZSTD_STATICLIB): ARFLAGS = rcs +$(ZSTD_STATICLIB): | $(ZSTD_STATICLIB_DIR) +$(ZSTD_STATICLIB): $(ZSTD_STATICLIB_OBJ) # Check for multithread flag at target execution time $(if $(filter -DZSTD_MULTITHREAD,$(CPPFLAGS)),\ @echo compiling multi-threaded static library $(LIBVER),\ @echo compiling single-threaded static library $(LIBVER)) $(AR) $(ARFLAGS) $@ $^ -libzstd.a: $(ZSTD_STATLIB) +libzstd.a: $(ZSTD_STATICLIB) cp -f $< $@ endif @@ -182,14 +184,14 @@ lib : libzstd.a libzstd # make does not consider implicit pattern rule for .PHONY target %-mt : CPPFLAGS_DYNLIB := -DZSTD_MULTITHREAD -%-mt : CPPFLAGS_STATLIB := -DZSTD_MULTITHREAD +%-mt : CPPFLAGS_STATICLIB := -DZSTD_MULTITHREAD %-mt : LDFLAGS_DYNLIB := -pthread %-mt : % @echo multi-threaded build completed %-nomt : CPPFLAGS_DYNLIB := %-nomt : LDFLAGS_DYNLIB := -%-nomt : CPPFLAGS_STATLIB := +%-nomt : CPPFLAGS_STATICLIB := %-nomt : % @echo single-threaded build completed @@ -206,23 +208,23 @@ $(ZSTD_DYNLIB_DIR)/%.o : %.c $(ZSTD_DYNLIB_DIR)/%.d | $(ZSTD_DYNLIB_DIR) @echo CC $@ $(COMPILE.c) $(DEPFLAGS) $(ZSTD_DYNLIB_DIR)/$*.d $(OUTPUT_OPTION) $< -$(ZSTD_STATLIB_DIR)/%.o : %.c $(ZSTD_STATLIB_DIR)/%.d | $(ZSTD_STATLIB_DIR) +$(ZSTD_STATICLIB_DIR)/%.o : %.c $(ZSTD_STATICLIB_DIR)/%.d | $(ZSTD_STATICLIB_DIR) @echo CC $@ - $(COMPILE.c) $(DEPFLAGS) $(ZSTD_STATLIB_DIR)/$*.d $(OUTPUT_OPTION) $< + $(COMPILE.c) $(DEPFLAGS) $(ZSTD_STATICLIB_DIR)/$*.d $(OUTPUT_OPTION) $< $(ZSTD_DYNLIB_DIR)/%.o : %.S | $(ZSTD_DYNLIB_DIR) @echo AS $@ $(COMPILE.S) $(OUTPUT_OPTION) $< -$(ZSTD_STATLIB_DIR)/%.o : %.S | $(ZSTD_STATLIB_DIR) +$(ZSTD_STATICLIB_DIR)/%.o : %.S | $(ZSTD_STATICLIB_DIR) @echo AS $@ $(COMPILE.S) $(OUTPUT_OPTION) $< MKDIR ?= mkdir -$(BUILD_DIR) $(ZSTD_DYNLIB_DIR) $(ZSTD_STATLIB_DIR): +$(BUILD_DIR) $(ZSTD_DYNLIB_DIR) $(ZSTD_STATICLIB_DIR): $(MKDIR) -p $@ -DEPFILES := $(ZSTD_DYNLIB_OBJ:.o=.d) $(ZSTD_STATLIB_OBJ:.o=.d) +DEPFILES := $(ZSTD_DYNLIB_OBJ:.o=.d) $(ZSTD_STATICLIB_OBJ:.o=.d) $(DEPFILES): include $(wildcard $(DEPFILES)) diff --git a/lib/libzstd.mk b/lib/libzstd.mk index ce6e1137519..fb3c36265bf 100644 --- a/lib/libzstd.mk +++ b/lib/libzstd.mk @@ -9,6 +9,7 @@ # ################################################################ # Ensure the file is not included twice +# Note : must be included after setting the default target ifndef LIBZSTD_MK_INCLUDED LIBZSTD_MK_INCLUDED := 1 @@ -64,6 +65,7 @@ VOID ?= /dev/null NUM_SYMBOL := \# # define silent mode as default (verbose mode with V=1 or VERBOSE=1) +# Note : must be defined _after_ the default target $(V)$(VERBOSE).SILENT: # When cross-compiling from linux to windows, From f4dbfce79cb2b82fb496fcd2518ecd3315051b7d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 27 Aug 2023 18:35:41 -0700 Subject: [PATCH 087/283] define LIB_SRCDIR and LIB_BINDIR --- lib/libzstd.mk | 26 ++++++++++-------- programs/Makefile | 8 +++--- tests/Makefile | 64 ++++++++++++++++++++++----------------------- tests/fuzz/Makefile | 53 ++++++++++++++++++------------------- 4 files changed, 75 insertions(+), 76 deletions(-) diff --git a/lib/libzstd.mk b/lib/libzstd.mk index fb3c36265bf..a308a6ef6c9 100644 --- a/lib/libzstd.mk +++ b/lib/libzstd.mk @@ -8,6 +8,9 @@ # You may select, at your option, one of the above-listed licenses. # ################################################################ +# This included Makefile provides the following variables : +# LIB_SRCDIR, LIB_BINDIR + # Ensure the file is not included twice # Note : must be included after setting the default target ifndef LIBZSTD_MK_INCLUDED @@ -17,8 +20,9 @@ LIBZSTD_MK_INCLUDED := 1 # Input Variables ################################################################## -# Zstd lib directory -LIBZSTD ?= ./ +# By default, library's directory is same as this included makefile +LIB_SRCDIR ?= $(dir $(realpath $(lastword $(MAKEFILE_LIST)))) +LIB_BINDIR ?= $(LIBSRC_DIR) # ZSTD_LIB_MINIFY is a helper variable that # configures a bunch of other variables to space-optimized defaults. @@ -75,7 +79,7 @@ $(V)$(VERBOSE).SILENT: TARGET_SYSTEM ?= $(OS) # Version numbers -LIBVER_SRC := $(LIBZSTD)/zstd.h +LIBVER_SRC := $(LIB_SRCDIR)/zstd.h LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)` LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)` LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)` @@ -142,14 +146,14 @@ ifeq ($(HAVE_COLORNEVER), 1) endif GREP = grep $(GREP_OPTIONS) -ZSTD_COMMON_FILES := $(sort $(wildcard $(LIBZSTD)/common/*.c)) -ZSTD_COMPRESS_FILES := $(sort $(wildcard $(LIBZSTD)/compress/*.c)) -ZSTD_DECOMPRESS_FILES := $(sort $(wildcard $(LIBZSTD)/decompress/*.c)) -ZSTD_DICTBUILDER_FILES := $(sort $(wildcard $(LIBZSTD)/dictBuilder/*.c)) -ZSTD_DEPRECATED_FILES := $(sort $(wildcard $(LIBZSTD)/deprecated/*.c)) +ZSTD_COMMON_FILES := $(sort $(wildcard $(LIB_SRCDIR)/common/*.c)) +ZSTD_COMPRESS_FILES := $(sort $(wildcard $(LIB_SRCDIR)/compress/*.c)) +ZSTD_DECOMPRESS_FILES := $(sort $(wildcard $(LIB_SRCDIR)/decompress/*.c)) +ZSTD_DICTBUILDER_FILES := $(sort $(wildcard $(LIB_SRCDIR)/dictBuilder/*.c)) +ZSTD_DEPRECATED_FILES := $(sort $(wildcard $(LIB_SRCDIR)/deprecated/*.c)) ZSTD_LEGACY_FILES := -ZSTD_DECOMPRESS_AMD64_ASM_FILES := $(sort $(wildcard $(LIBZSTD)/decompress/*_amd64.S)) +ZSTD_DECOMPRESS_AMD64_ASM_FILES := $(sort $(wildcard $(LIB_SRCDIR)/decompress/*_amd64.S)) ifneq ($(ZSTD_NO_ASM), 0) CPPFLAGS += -DZSTD_DISABLE_ASM @@ -197,7 +201,7 @@ endif ifneq ($(ZSTD_LEGACY_SUPPORT), 0) ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0) - ZSTD_LEGACY_FILES += $(shell ls $(LIBZSTD)/legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]') + ZSTD_LEGACY_FILES += $(shell ls $(LIB_SRCDIR)/legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]') endif endif CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) @@ -226,7 +230,7 @@ ifeq ($(HAVE_HASH),0) endif endif # BUILD_DIR -ZSTD_SUBDIR := $(LIBZSTD)/common $(LIBZSTD)/compress $(LIBZSTD)/decompress $(LIBZSTD)/dictBuilder $(LIBZSTD)/legacy $(LIBZSTD)/deprecated +ZSTD_SUBDIR := $(LIB_SRCDIR)/common $(LIB_SRCDIR)/compress $(LIB_SRCDIR)/decompress $(LIB_SRCDIR)/dictBuilder $(LIB_SRCDIR)/legacy $(LIB_SRCDIR)/deprecated vpath %.c $(ZSTD_SUBDIR) vpath %.S $(ZSTD_SUBDIR) diff --git a/programs/Makefile b/programs/Makefile index 99e004c9456..6cd5c1eee55 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -16,12 +16,10 @@ # ########################################################################## # default target (when runing `make` with no argument) -.PHONY: zstd-release zstd-release: -LIBZSTD := ../lib - -include $(LIBZSTD)/libzstd.mk +LIBZSTD_MK_DIR = ../lib +include $(LIBZSTD_MK_DIR)/libzstd.mk ifeq ($(shell $(CC) -v 2>&1 | $(GREP) -c "gcc version "), 1) ALIGN_LOOP = -falign-loops=32 @@ -224,7 +222,7 @@ zstd-noxz : zstd ## zstd-dll: zstd executable linked to dynamic library libzstd (must have same version) .PHONY: zstd-dll -zstd-dll : LDFLAGS+= -L$(LIBZSTD) +zstd-dll : LDFLAGS+= -L$(LIB_BINDIR) zstd-dll : LDLIBS += -lzstd zstd-dll : ZSTDLIB_LOCAL_SRC = xxhash.c pool.c threading.c zstd-dll : zstd diff --git a/tests/Makefile b/tests/Makefile index c31e7500558..740bb59593b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -20,23 +20,21 @@ # zstreamtest32: Same as zstreamtest, but forced to compile in 32-bits mode # ########################################################################## -LIBZSTD = ../lib - ZSTD_LEGACY_SUPPORT ?= 0 DEBUGLEVEL ?= 2 export DEBUGLEVEL # transmit value to sub-makefiles -include $(LIBZSTD)/libzstd.mk +LIBZSTD_MK_DIR := ../lib +include $(LIBZSTD_MK_DIR)/libzstd.mk -ZSTDDIR = $(LIBZSTD) PRGDIR = ../programs PYTHON ?= python3 TESTARTEFACT := versionsTest DEBUGFLAGS += -g -Wno-c++-compat -CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ - -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \ +CPPFLAGS += -I$(LIB_SRCDIR) -I$(LIB_SRCDIR)/common -I$(LIB_SRCDIR)/compress \ + -I$(LIB_SRCDIR)/dictBuilder -I$(LIB_SRCDIR)/deprecated -I$(PRGDIR) \ -DZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY=1 ZSTDCOMMON_FILES := $(sort $(ZSTD_COMMON_FILES)) @@ -46,15 +44,15 @@ ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) ZDICT_FILES := $(sort $(ZSTD_DICTBUILDER_FILES)) ZSTD_F1 := $(sort $(wildcard $(ZSTD_FILES))) -ZSTD_OBJ1 := $(subst $(ZSTDDIR)/common/,zstdm_,$(ZSTD_F1)) -ZSTD_OBJ2 := $(subst $(ZSTDDIR)/compress/,zstdc_,$(ZSTD_OBJ1)) -ZSTD_OBJ3 := $(subst $(ZSTDDIR)/decompress/,zstdd_,$(ZSTD_OBJ2)) +ZSTD_OBJ1 := $(subst $(LIB_SRCDIR)/common/,zstdm_,$(ZSTD_F1)) +ZSTD_OBJ2 := $(subst $(LIB_SRCDIR)/compress/,zstdc_,$(ZSTD_OBJ1)) +ZSTD_OBJ3 := $(subst $(LIB_SRCDIR)/decompress/,zstdd_,$(ZSTD_OBJ2)) ZSTD_OBJ4 := $(ZSTD_OBJ3:.c=.o) ZSTD_OBJECTS := $(ZSTD_OBJ4:.S=.o) -ZSTDMT_OBJ1 := $(subst $(ZSTDDIR)/common/,zstdmt_m_,$(ZSTD_F1)) -ZSTDMT_OBJ2 := $(subst $(ZSTDDIR)/compress/,zstdmt_c_,$(ZSTDMT_OBJ1)) -ZSTDMT_OBJ3 := $(subst $(ZSTDDIR)/decompress/,zstdmt_d_,$(ZSTDMT_OBJ2)) +ZSTDMT_OBJ1 := $(subst $(LIB_SRCDIR)/common/,zstdmt_m_,$(ZSTD_F1)) +ZSTDMT_OBJ2 := $(subst $(LIB_SRCDIR)/compress/,zstdmt_c_,$(ZSTDMT_OBJ1)) +ZSTDMT_OBJ3 := $(subst $(LIB_SRCDIR)/decompress/,zstdmt_d_,$(ZSTDMT_OBJ2)) ZSTDMT_OBJ4 := $(ZSTDMT_OBJ3:.c=.o) ZSTDMT_OBJECTS := $(ZSTDMT_OBJ4:.S=.o) @@ -100,38 +98,38 @@ zstd zstd32 zstd-nolegacy zstd-dll: .PHONY: libzstd libzstd : - $(MAKE) -C $(ZSTDDIR) libzstd MOREFLAGS+="$(DEBUGFLAGS)" + $(MAKE) -C $(LIB_SRCDIR) libzstd MOREFLAGS+="$(DEBUGFLAGS)" %-dll : libzstd -%-dll : LDFLAGS += -L$(ZSTDDIR) -lzstd +%-dll : LDFLAGS += -L$(LIB_BINDIR) -lzstd -$(ZSTDDIR)/libzstd.a : - $(MAKE) -C $(ZSTDDIR) libzstd.a +$(LIB_BINDIR)/libzstd.a : + $(MAKE) -C $(LIB_SRCDIR) libzstd.a -zstdm_%.o : $(ZSTDDIR)/common/%.c +zstdm_%.o : $(LIB_SRCDIR)/common/%.c $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ -zstdc_%.o : $(ZSTDDIR)/compress/%.c +zstdc_%.o : $(LIB_SRCDIR)/compress/%.c $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ -zstdd_%.o : $(ZSTDDIR)/decompress/%.c +zstdd_%.o : $(LIB_SRCDIR)/decompress/%.c $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ -zstdd_%.o : $(ZSTDDIR)/decompress/%.S +zstdd_%.o : $(LIB_SRCDIR)/decompress/%.S $(CC) -c $(CPPFLAGS) $(ASFLAGS) $< -o $@ zstdmt%.o : CPPFLAGS += $(MULTITHREAD_CPP) -zstdmt_m_%.o : $(ZSTDDIR)/common/%.c +zstdmt_m_%.o : $(LIB_SRCDIR)/common/%.c $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ -zstdmt_c_%.o : $(ZSTDDIR)/compress/%.c +zstdmt_c_%.o : $(LIB_SRCDIR)/compress/%.c $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ -zstdmt_d_%.o : $(ZSTDDIR)/decompress/%.c +zstdmt_d_%.o : $(LIB_SRCDIR)/decompress/%.c $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ -zstdmt_d_%.o : $(ZSTDDIR)/decompress/%.S +zstdmt_d_%.o : $(LIB_SRCDIR)/decompress/%.S $(CC) -c $(CPPFLAGS) $(ASFLAGS) $< -o $@ FULLBENCHS := fullbench fullbench32 @@ -146,12 +144,12 @@ $(FULLBENCHS) : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR CLEAN += fullbench-lib fullbench-lib : CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c $(ZSTDDIR)/libzstd.a fullbench.c +fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c $(LIB_SRCDIR)/libzstd.a fullbench.c $(LINK.c) $^ -o $@$(EXT) # note : broken : requires symbols unavailable from dynamic library fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c $(PRGDIR)/timefn.c fullbench.c -# $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll +# $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(LIB_SRCDIR)/dll/libzstd.dll $(LINK.c) $^ $(LDLIBS) -o $@$(EXT) CLEAN += fuzzer fuzzer32 @@ -165,7 +163,7 @@ fuzzer32 : $(ZSTD_FILES) $(LINK.c) $^ -o $@$(EXT) # note : broken : requires symbols unavailable from dynamic library -fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c fuzzer.c +fuzzer-dll : $(LIB_SRCDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c fuzzer.c $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) CLEAN += zstreamtest zstreamtest32 @@ -196,7 +194,7 @@ zstreamtest_ubsan : $(ZSTREAMFILES) $(LINK.c) $(MULTITHREAD) $^ -o $@$(EXT) # note : broken : requires symbols unavailable from dynamic library -zstreamtest-dll : $(ZSTDDIR)/common/xxhash.c # xxh symbols not exposed from dll +zstreamtest-dll : $(LIB_SRCDIR)/common/xxhash.c # xxh symbols not exposed from dll zstreamtest-dll : $(ZSTREAM_LOCAL_FILES) $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) @@ -224,15 +222,15 @@ CLEAN += invalidDictionaries invalidDictionaries : $(ZSTD_OBJECTS) invalidDictionaries.c CLEAN += legacy -legacy : CPPFLAGS += -I$(ZSTDDIR)/legacy -UZSTD_LEGACY_SUPPORT -DZSTD_LEGACY_SUPPORT=4 -legacy : $(ZSTD_FILES) $(sort $(wildcard $(ZSTDDIR)/legacy/*.c)) legacy.c +legacy : CPPFLAGS += -I$(LIB_SRCDIR)/legacy -UZSTD_LEGACY_SUPPORT -DZSTD_LEGACY_SUPPORT=4 +legacy : $(ZSTD_FILES) $(sort $(wildcard $(LIB_SRCDIR)/legacy/*.c)) legacy.c CLEAN += decodecorpus decodecorpus : LDLIBS += -lm decodecorpus : $(filter-out zstdc_zstd_compress.o, $(ZSTD_OBJECTS)) $(ZDICT_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c decodecorpus.c CLEAN += poolTests -poolTests : $(PRGDIR)/util.c $(PRGDIR)/timefn.c poolTests.c $(ZSTDDIR)/common/pool.c $(ZSTDDIR)/common/threading.c $(ZSTDDIR)/common/zstd_common.c $(ZSTDDIR)/common/error_private.c +poolTests : $(PRGDIR)/util.c $(PRGDIR)/timefn.c poolTests.c $(LIB_SRCDIR)/common/pool.c $(LIB_SRCDIR)/common/threading.c $(LIB_SRCDIR)/common/zstd_common.c $(LIB_SRCDIR)/common/error_private.c $(LINK.c) $(MULTITHREAD) $^ -o $@$(EXT) .PHONY: versionsTest @@ -245,11 +243,11 @@ automated_benchmarking: clean # make checkTag : check that release tag corresponds to release version CLEAN += checkTag -checkTag.o : $(ZSTDDIR)/zstd.h +checkTag.o : $(LIB_SRCDIR)/zstd.h .PHONY: clean clean: - $(MAKE) -C $(ZSTDDIR) clean + $(MAKE) -C $(LIB_SRCDIR) clean $(MAKE) -C $(PRGDIR) clean $(RM) -fR $(TESTARTEFACT) $(RM) -rf tmp* # some test directories are named tmp* diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 525e396bca4..cc6e15e313a 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -24,13 +24,12 @@ else endif CORPORA_URL_PREFIX:=https://github.com/facebook/zstd/releases/download/fuzz-corpora/ -LIBZSTD = ../../lib +LIBZSTD_MK_DIR = ../../lib DEBUGLEVEL ?= 2 ZSTD_LEGACY_SUPPORT ?= 1 -include $(LIBZSTD)/libzstd.mk +include $(LIBZSTD_MK_DIR)/libzstd.mk -ZSTDDIR = ../../lib PRGDIR = ../../programs CONTRIBDIR = ../../contrib @@ -38,8 +37,8 @@ DEFAULT_SEQ_PROD_DIR = $(CONTRIBDIR)/externalSequenceProducer DEFAULT_SEQ_PROD_SRC = $(DEFAULT_SEQ_PROD_DIR)/sequence_producer.c THIRD_PARTY_SEQ_PROD_OBJ ?= -FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ - -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \ +FUZZ_CPPFLAGS := -I$(LIB_SRCDIR) -I$(LIB_SRCDIR)/common -I$(LIB_SRCDIR)/compress \ + -I$(LIB_SRCDIR)/dictBuilder -I$(LIB_SRCDIR)/deprecated -I$(LIB_SRCDIR)/legacy \ -I$(CONTRIBDIR)/seekable_format -I$(PRGDIR) -I$(DEFAULT_SEQ_PROD_DIR) \ -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS) FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ @@ -78,11 +77,11 @@ FUZZ_SRC := \ $(DEFAULT_SEQ_PROD_SRC) FUZZ_SRC := $(sort $(wildcard $(FUZZ_SRC))) -FUZZ_D_OBJ1 := $(subst $(ZSTDDIR)/common/,d_lib_common_,$(FUZZ_SRC)) -FUZZ_D_OBJ2 := $(subst $(ZSTDDIR)/compress/,d_lib_compress_,$(FUZZ_D_OBJ1)) -FUZZ_D_OBJ3 := $(subst $(ZSTDDIR)/decompress/,d_lib_decompress_,$(FUZZ_D_OBJ2)) -FUZZ_D_OBJ4 := $(subst $(ZSTDDIR)/dictBuilder/,d_lib_dictBuilder_,$(FUZZ_D_OBJ3)) -FUZZ_D_OBJ5 := $(subst $(ZSTDDIR)/legacy/,d_lib_legacy_,$(FUZZ_D_OBJ4)) +FUZZ_D_OBJ1 := $(subst $(LIB_SRCDIR)/common/,d_lib_common_,$(FUZZ_SRC)) +FUZZ_D_OBJ2 := $(subst $(LIB_SRCDIR)/compress/,d_lib_compress_,$(FUZZ_D_OBJ1)) +FUZZ_D_OBJ3 := $(subst $(LIB_SRCDIR)/decompress/,d_lib_decompress_,$(FUZZ_D_OBJ2)) +FUZZ_D_OBJ4 := $(subst $(LIB_SRCDIR)/dictBuilder/,d_lib_dictBuilder_,$(FUZZ_D_OBJ3)) +FUZZ_D_OBJ5 := $(subst $(LIB_SRCDIR)/legacy/,d_lib_legacy_,$(FUZZ_D_OBJ4)) FUZZ_D_OBJ6 := $(subst $(PRGDIR)/,d_prg_,$(FUZZ_D_OBJ5)) FUZZ_D_OBJ7 := $(subst $(DEFAULT_SEQ_PROD_DIR)/,d_default_seq_prod_,$(FUZZ_D_OBJ6)) FUZZ_D_OBJ8 := $(subst $\./,d_fuzz_,$(FUZZ_D_OBJ7)) @@ -90,11 +89,11 @@ FUZZ_D_OBJ9 := $(FUZZ_D_OBJ8:.c=.o) FUZZ_D_OBJ10 := $(THIRD_PARTY_SEQ_PROD_OBJ) $(FUZZ_D_OBJ9) FUZZ_DECOMPRESS_OBJ := $(FUZZ_D_OBJ10:.S=.o) -FUZZ_RT_OBJ1 := $(subst $(ZSTDDIR)/common/,rt_lib_common_,$(FUZZ_SRC)) -FUZZ_RT_OBJ2 := $(subst $(ZSTDDIR)/compress/,rt_lib_compress_,$(FUZZ_RT_OBJ1)) -FUZZ_RT_OBJ3 := $(subst $(ZSTDDIR)/decompress/,rt_lib_decompress_,$(FUZZ_RT_OBJ2)) -FUZZ_RT_OBJ4 := $(subst $(ZSTDDIR)/dictBuilder/,rt_lib_dictBuilder_,$(FUZZ_RT_OBJ3)) -FUZZ_RT_OBJ5 := $(subst $(ZSTDDIR)/legacy/,rt_lib_legacy_,$(FUZZ_RT_OBJ4)) +FUZZ_RT_OBJ1 := $(subst $(LIB_SRCDIR)/common/,rt_lib_common_,$(FUZZ_SRC)) +FUZZ_RT_OBJ2 := $(subst $(LIB_SRCDIR)/compress/,rt_lib_compress_,$(FUZZ_RT_OBJ1)) +FUZZ_RT_OBJ3 := $(subst $(LIB_SRCDIR)/decompress/,rt_lib_decompress_,$(FUZZ_RT_OBJ2)) +FUZZ_RT_OBJ4 := $(subst $(LIB_SRCDIR)/dictBuilder/,rt_lib_dictBuilder_,$(FUZZ_RT_OBJ3)) +FUZZ_RT_OBJ5 := $(subst $(LIB_SRCDIR)/legacy/,rt_lib_legacy_,$(FUZZ_RT_OBJ4)) FUZZ_RT_OBJ6 := $(subst $(PRGDIR)/,rt_prg_,$(FUZZ_RT_OBJ5)) FUZZ_RT_OBJ7 := $(subst $(DEFAULT_SEQ_PROD_DIR)/,rt_default_seq_prod_,$(FUZZ_RT_OBJ6)) FUZZ_RT_OBJ8 := $(subst $\./,rt_fuzz_,$(FUZZ_RT_OBJ7)) @@ -129,22 +128,22 @@ FUZZ_TARGETS := \ all: libregression.a $(FUZZ_TARGETS) -rt_lib_common_%.o: $(ZSTDDIR)/common/%.c +rt_lib_common_%.o: $(LIB_SRCDIR)/common/%.c $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ -rt_lib_compress_%.o: $(ZSTDDIR)/compress/%.c +rt_lib_compress_%.o: $(LIB_SRCDIR)/compress/%.c $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ -rt_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.c +rt_lib_decompress_%.o: $(LIB_SRCDIR)/decompress/%.c $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ -rt_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.S +rt_lib_decompress_%.o: $(LIB_SRCDIR)/decompress/%.S $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_ASFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ -rt_lib_dictBuilder_%.o: $(ZSTDDIR)/dictBuilder/%.c +rt_lib_dictBuilder_%.o: $(LIB_SRCDIR)/dictBuilder/%.c $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ -rt_lib_legacy_%.o: $(ZSTDDIR)/legacy/%.c +rt_lib_legacy_%.o: $(LIB_SRCDIR)/legacy/%.c $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ rt_prg_%.o: $(PRGDIR)/%.c @@ -156,22 +155,22 @@ rt_fuzz_%.o: %.c rt_default_seq_prod_%.o: $(DEFAULT_SEQ_PROD_DIR)/%.c $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@ -d_lib_common_%.o: $(ZSTDDIR)/common/%.c +d_lib_common_%.o: $(LIB_SRCDIR)/common/%.c $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ -d_lib_compress_%.o: $(ZSTDDIR)/compress/%.c +d_lib_compress_%.o: $(LIB_SRCDIR)/compress/%.c $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ -d_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.c +d_lib_decompress_%.o: $(LIB_SRCDIR)/decompress/%.c $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ -d_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.S +d_lib_decompress_%.o: $(LIB_SRCDIR)/decompress/%.S $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_ASFLAGS) $< -c -o $@ -d_lib_dictBuilder_%.o: $(ZSTDDIR)/dictBuilder/%.c +d_lib_dictBuilder_%.o: $(LIB_SRCDIR)/dictBuilder/%.c $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ -d_lib_legacy_%.o: $(ZSTDDIR)/legacy/%.c +d_lib_legacy_%.o: $(LIB_SRCDIR)/legacy/%.c $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@ d_prg_%.o: $(PRGDIR)/%.c From 607933a2ff41f985ec9f05f2a0fc3b5b74f52b48 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 27 Aug 2023 21:33:32 -0700 Subject: [PATCH 088/283] minor simplification for dependency generation also : fix zstd-nomt exclusion and test --- lib/Makefile | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index 9591cff0f1e..754c909609d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -202,15 +202,18 @@ lib : libzstd.a libzstd # Generate .h dependencies automatically -DEPFLAGS = -MT $@ -MMD -MP -MF +# -MMD: compiler generates dependency information as a side-effect of compilation, without system headers +# -MP: adds phony target for each dependency other than main file. +DEPFLAGS = -MMD -MP -$(ZSTD_DYNLIB_DIR)/%.o : %.c $(ZSTD_DYNLIB_DIR)/%.d | $(ZSTD_DYNLIB_DIR) +# ensure that ZSTD_DYNLIB_DIR exists prior to generating %.o +$(ZSTD_DYNLIB_DIR)/%.o : %.c | $(ZSTD_DYNLIB_DIR) @echo CC $@ - $(COMPILE.c) $(DEPFLAGS) $(ZSTD_DYNLIB_DIR)/$*.d $(OUTPUT_OPTION) $< + $(COMPILE.c) $(DEPFLAGS) $(OUTPUT_OPTION) $< -$(ZSTD_STATICLIB_DIR)/%.o : %.c $(ZSTD_STATICLIB_DIR)/%.d | $(ZSTD_STATICLIB_DIR) +$(ZSTD_STATICLIB_DIR)/%.o : %.c | $(ZSTD_STATICLIB_DIR) @echo CC $@ - $(COMPILE.c) $(DEPFLAGS) $(ZSTD_STATICLIB_DIR)/$*.d $(OUTPUT_OPTION) $< + $(COMPILE.c) $(DEPFLAGS) $(OUTPUT_OPTION) $< $(ZSTD_DYNLIB_DIR)/%.o : %.S | $(ZSTD_DYNLIB_DIR) @echo AS $@ @@ -220,24 +223,31 @@ $(ZSTD_STATICLIB_DIR)/%.o : %.S | $(ZSTD_STATICLIB_DIR) @echo AS $@ $(COMPILE.S) $(OUTPUT_OPTION) $< -MKDIR ?= mkdir +MKDIR ?= mkdir -p $(BUILD_DIR) $(ZSTD_DYNLIB_DIR) $(ZSTD_STATICLIB_DIR): - $(MKDIR) -p $@ + $(MKDIR) $@ DEPFILES := $(ZSTD_DYNLIB_OBJ:.o=.d) $(ZSTD_STATICLIB_OBJ:.o=.d) $(DEPFILES): -include $(wildcard $(DEPFILES)) +# The leading '-' means: do not fail is include fails (ex: directory does not exist yet) +-include $(wildcard $(DEPFILES)) -# Special case : building library in single-thread mode _and_ without zstdmt_compress.c -ZSTDMT_FILES = compress/zstdmt_compress.c -ZSTD_NOMT_FILES = $(filter-out $(ZSTDMT_FILES),$(ZSTD_FILES)) +# Special case : build library in single-thread mode _and_ without zstdmt_compress.c +# Note : we still need threading.c and pool.c for the dictionary builder, +# but they will correctly behave single-threaded. +ZSTDMT_FILES = zstdmt_compress.c +ZSTD_NOMT_FILES = $(filter-out $(ZSTDMT_FILES),$(notdir $(ZSTD_FILES))) libzstd-nomt: CFLAGS += -fPIC -fvisibility=hidden libzstd-nomt: LDFLAGS += -shared libzstd-nomt: $(ZSTD_NOMT_FILES) @echo compiling single-thread dynamic library $(LIBVER) @echo files : $(ZSTD_NOMT_FILES) + @if echo "$(ZSTD_NOMT_FILES)" | tr ' ' '\n' | $(GREP) -q zstdmt; then \ + echo "Error: Found zstdmt in list."; \ + exit 1; \ + fi $(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@ .PHONY: clean From 3fc14e411b18869e333732ceedad4f1052d73b86 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 13 Sep 2023 11:35:19 -0700 Subject: [PATCH 089/283] added some documentation on ZSTD_estimate*Size() variants as a follow up for #3747 --- lib/zstd.h | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/lib/zstd.h b/lib/zstd.h index 148f112d184..c3cf056436b 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1642,19 +1642,23 @@ ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); /*! ZSTD_estimate*() : * These functions make it possible to estimate memory usage * of a future {D,C}Ctx, before its creation. + * This is useful in combination with ZSTD_initStatic(), + * which makes it possible to employ a static buffer for ZSTD_CCtx* state. * * ZSTD_estimateCCtxSize() will provide a memory budget large enough - * for any compression level up to selected one. - * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate - * does not include space for a window buffer. - * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2() + * associated with any compression level up to max specified one. * The estimate will assume the input may be arbitrarily large, * which is the worst case. * + * Note that the size estimation is specific for one-shot compression, + * it is not valid for streaming (see ZSTD_estimateCStreamSize*()) + * nor other potential ways of using a ZSTD_CCtx* state. + * * When srcSize can be bound by a known and rather "small" value, - * this fact can be used to provide a tighter estimation - * because the CCtx compression context will need less memory. - * This tighter estimation can be provided by more advanced functions + * this knowledge can be used to provide a tighter budget estimation + * because the ZSTD_CCtx* state will need less memory for small inputs. + * This tighter estimation can be provided by employing more advanced functions * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. @@ -1665,33 +1669,35 @@ ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); * Note 2 : ZSTD_estimateCCtxSize* functions are not compatible with the Block-Level Sequence Producer API at this time. * Size estimates assume that no external sequence producer is registered. */ -ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel); ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void); /*! ZSTD_estimateCStreamSize() : - * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. - * It will also consider src size to be arbitrarily "large", which is worst case. + * ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression + * using any compression level up to the max specified one. + * It will also consider src size to be arbitrarily "large", which is a worst case scenario. * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. * Note : CStream size estimation is only correct for single-threaded compression. - * ZSTD_DStream memory budget depends on window Size. + * ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time. + * Size estimates assume that no external sequence producer is registered. + * + * ZSTD_DStream memory budget depends on frame's window Size. * This information can be passed manually, using ZSTD_estimateDStreamSize, * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Any frame requesting a window size larger than max specified one will be rejected. * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), * an internal ?Dict will be created, which additional size is not estimated here. * In this case, get total size by adding ZSTD_estimate?DictSize - * Note 2 : only single-threaded compression is supported. - * ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. - * Note 3 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time. - * Size estimates assume that no external sequence producer is registered. */ -ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel); ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize); ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); /*! ZSTD_estimate?DictSize() : From 48b5a7bd8bedcfcaf22631d45c61c2f544315053 Mon Sep 17 00:00:00 2001 From: Dominik Loidolt Date: Tue, 19 Sep 2023 16:22:47 +0200 Subject: [PATCH 090/283] Fix a very small formatting typo in the lib/README.md file --- lib/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/README.md b/lib/README.md index 47982093382..572b7df78a1 100644 --- a/lib/README.md +++ b/lib/README.md @@ -88,7 +88,7 @@ The file structure is designed to make this selection manually achievable for an For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` . - While invoking `make libzstd`, it's possible to define build macros - `ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`, + `ZSTD_LIB_COMPRESSION`, `ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`, and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the corresponding features. This will also disable compilation of all dependencies (e.g. `ZSTD_LIB_COMPRESSION=0` will also disable From cdceb0fce59785c841bf697e00067163106064e1 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 22 Sep 2023 11:51:15 -0700 Subject: [PATCH 091/283] Improve macro guards for ZSTD_assertValidSequence Refine the macro guards to define the functions exactly when they are needed. This fixes the chromium build with zstd. Thanks to @GregTho for reporting! --- lib/decompress/zstd_decompress_block.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index c90536ad716..9e5c5144b7b 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -1340,8 +1340,9 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c return seq; } -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) +#if DEBUGLEVEL >= 1 +static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) { size_t const windowSize = dctx->fParams.windowSize; /* No dictionary used. */ @@ -1355,8 +1356,9 @@ MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefix /* Dictionary is active. */ return 1; } +#endif -MEM_STATIC void ZSTD_assertValidSequence( +static void ZSTD_assertValidSequence( ZSTD_DCtx const* dctx, BYTE const* op, BYTE const* oend, seq_t const seq, From fe34776c207f3f879f386ed4158a38d927ff6d10 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Sat, 23 Sep 2023 18:56:01 +0200 Subject: [PATCH 092/283] Fix new typos found by codespell --- build/cmake/lib/CMakeLists.txt | 10 +++++----- doc/educational_decoder/zstd_decompress.c | 2 +- doc/zstd_manual.html | 22 +++++++++++----------- lib/common/pool.h | 2 +- lib/compress/huf_compress.c | 2 +- lib/compress/zstd_compress.c | 2 +- lib/compress/zstd_compress_internal.h | 4 ++-- lib/compress/zstd_cwksp.h | 8 ++++---- lib/decompress/zstd_decompress_block.c | 4 ++-- lib/zstd.h | 22 +++++++++++----------- tests/fuzz/fuzz_third_party_seq_prod.h | 4 ++-- tests/fuzzer.c | 8 ++++---- tests/zstreamtest.c | 8 ++++---- 13 files changed, 49 insertions(+), 49 deletions(-) diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index cf1252e1964..2b64a07e2f8 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -17,11 +17,11 @@ option(ZSTD_BUILD_DECOMPRESSION "BUILD DECOMPRESSION MODULE" ON) option(ZSTD_BUILD_DICTBUILDER "BUILD DICTBUILDER MODULE" ON) option(ZSTD_BUILD_DEPRECATED "BUILD DEPRECATED MODULE" OFF) -set(ZSTDLIB_VISIBLE "" CACHE STRING "Visiblity for ZSTDLIB API") -set(ZSTDERRORLIB_VISIBLE "" CACHE STRING "Visiblity for ZSTDERRORLIB_VISIBLE API") -set(ZDICTLIB_VISIBLE "" CACHE STRING "Visiblity for ZDICTLIB_VISIBLE API") -set(ZSTDLIB_STATIC_API "" CACHE STRING "Visiblity for ZSTDLIB_STATIC_API API") -set(ZDICTLIB_STATIC_API "" CACHE STRING "Visiblity for ZDICTLIB_STATIC_API API") +set(ZSTDLIB_VISIBLE "" CACHE STRING "Visibility for ZSTDLIB API") +set(ZSTDERRORLIB_VISIBLE "" CACHE STRING "Visibility for ZSTDERRORLIB_VISIBLE API") +set(ZDICTLIB_VISIBLE "" CACHE STRING "Visibility for ZDICTLIB_VISIBLE API") +set(ZSTDLIB_STATIC_API "" CACHE STRING "Visibility for ZSTDLIB_STATIC_API API") +set(ZDICTLIB_STATIC_API "" CACHE STRING "Visibility for ZDICTLIB_STATIC_API API") set_property(CACHE ZSTDLIB_VISIBLE PROPERTY STRINGS "" "hidden" "default" "protected" "internal") set_property(CACHE ZSTDERRORLIB_VISIBLE PROPERTY STRINGS "" "hidden" "default" "protected" "internal") diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c index 921c8f54cd3..839e085b481 100644 --- a/doc/educational_decoder/zstd_decompress.c +++ b/doc/educational_decoder/zstd_decompress.c @@ -1213,7 +1213,7 @@ static void decode_seq_table(FSE_dtable *const table, istream_t *const in, break; } case seq_repeat: - // "Repeat_Mode : re-use distribution table from previous compressed + // "Repeat_Mode : reuse distribution table from previous compressed // block." // Nothing to do here, table will be unchanged if (!table->symbols) { diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index dcc10208dd3..d72eacc34cd 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -174,7 +174,7 @@

Helper functions

/* ZSTD_compressBound() :
 
 

Compression context

  When compressing many times,
   it is recommended to allocate a context just once,
-  and re-use it for each successive compression operation.
+  and reuse it for each successive compression operation.
   This will make workload friendlier for system's memory.
   Note : re-using context is just a speed / resource optimization.
          It doesn't change the compression ratio, which remains identical.
@@ -200,7 +200,7 @@ 

Compression context

  When compressing many times,
 
 

Decompression context

  When decompressing many times,
   it is recommended to allocate a context only once,
-  and re-use it for each successive compression operation.
+  and reuse it for each successive compression operation.
   This will make workload friendlier for system's memory.
   Use one context per thread for parallel execution. 
 
typedef struct ZSTD_DCtx_s ZSTD_DCtx;
@@ -568,14 +568,14 @@ 

Decompression context

  When decompressing many times,
   A ZSTD_CStream object is required to track streaming operation.
   Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
   ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
-  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+  It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
 
   For parallel execution, use one separate ZSTD_CStream per thread.
 
   note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
 
   Parameters are sticky : when starting a new compression on the same context,
-  it will re-use the same sticky parameters as previous compression session.
+  it will reuse the same sticky parameters as previous compression session.
   When in doubt, it's recommended to fully initialize the context before usage.
   Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
   ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
@@ -698,7 +698,7 @@ 

Streaming compression functions

typedef enum {
 

Streaming decompression - HowTo

   A ZSTD_DStream object is required to track streaming operations.
   Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
-  ZSTD_DStream objects can be re-used multiple times.
+  ZSTD_DStream objects can be reused multiple times.
 
   Use ZSTD_initDStream() to start a new decompression operation.
  @return : recommended first input size
@@ -869,7 +869,7 @@ 

Streaming decompression functions


Advanced dictionary and prefix API (Requires v1.4.0+)

  This API allows dictionaries to be used with ZSTD_compress2(),
  ZSTD_compressStream2(), and ZSTD_decompressDCtx().
- Dictionaries are sticky, they remain valid when same context is re-used,
+ Dictionaries are sticky, they remain valid when same context is reused,
  they only reset when the context is reset
  with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters.
  In contrast, Prefixes are single-use.
@@ -1857,7 +1857,7 @@ 

Advanced Streaming compression functions

0, its value must be correct, as it will be written in header, and controlled at the end. @@ -1918,7 +1918,7 @@

Advanced Streaming decompression functions

ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - re-use decompression parameters from previous init; saves dictionary loading + reuse decompression parameters from previous init; saves dictionary loading


@@ -1964,7 +1964,7 @@

Advanced Streaming decompression functions

Buffer-less streaming compression (synchronous mode)

   A ZSTD_CCtx object is required to track streaming operations.
   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
-  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+  ZSTD_CCtx object can be reused multiple times within successive compression operations.
 
   Start by initializing a context.
   Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
@@ -1985,7 +1985,7 @@ 

Advanced Streaming decompression functions

It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. - `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. + `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again.

Buffer-less streaming compression functions

ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
@@ -2002,7 +2002,7 @@ 

Buffer-less streaming compression functions

ZSTD_DEPR
 

Buffer-less streaming decompression (synchronous mode)

   A ZSTD_DCtx object is required to track streaming operations.
   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
-  A ZSTD_DCtx object can be re-used multiple times.
+  A ZSTD_DCtx object can be reused multiple times.
 
   First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
   Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
diff --git a/lib/common/pool.h b/lib/common/pool.h
index eb22ff509f5..cca4de73a83 100644
--- a/lib/common/pool.h
+++ b/lib/common/pool.h
@@ -47,7 +47,7 @@ void POOL_joinJobs(POOL_ctx* ctx);
 /*! POOL_resize() :
  *  Expands or shrinks pool's number of threads.
  *  This is more efficient than releasing + creating a new context,
- *  since it tries to preserve and re-use existing threads.
+ *  since it tries to preserve and reuse existing threads.
  * `numThreads` must be at least 1.
  * @return : 0 when resize was successful,
  *           !0 (typically 1) if there is an error.
diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c
index 3fe25789603..1097d13d87b 100644
--- a/lib/compress/huf_compress.c
+++ b/lib/compress/huf_compress.c
@@ -1447,7 +1447,7 @@ size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
 /* HUF_compress4X_repeat():
  * compress input using 4 streams.
  * consider skipping quickly
- * re-use an existing huffman compression table */
+ * reuse an existing huffman compression table */
 size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
                       const void* src, size_t srcSize,
                       unsigned maxSymbolValue, unsigned huffLog,
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 36c1f99e800..a51c0079258 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -2606,7 +2606,7 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa
     assert(size < (1U<<31));   /* can be casted to int */
 
 #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
-    /* To validate that the table re-use logic is sound, and that we don't
+    /* To validate that the table reuse logic is sound, and that we don't
      * access table space that we haven't cleaned, we re-"poison" the table
      * space every time we mark it dirty.
      *
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index ac8dfb71a41..ce4242c8d11 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -39,7 +39,7 @@ extern "C" {
                                        It's not a big deal though : candidate will just be sorted again.
                                        Additionally, candidate position 1 will be lost.
                                        But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
-                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
+                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy.
                                        This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
 
 
@@ -228,7 +228,7 @@ struct ZSTD_matchState_t {
     U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
     BYTE* tagTable;                          /* For row-based matchFinder: A row-based table containing the hashes and head index. */
     U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
-    U64 hashSalt;                            /* For row-based matchFinder: salts the hash for re-use of tag table */
+    U64 hashSalt;                            /* For row-based matchFinder: salts the hash for reuse of tag table */
     U32 hashSaltEntropy;                     /* For row-based matchFinder: collects entropy for salt generation */
 
     U32* hashTable;
diff --git a/lib/compress/zstd_cwksp.h b/lib/compress/zstd_cwksp.h
index a3efc56e523..3eddbd334e8 100644
--- a/lib/compress/zstd_cwksp.h
+++ b/lib/compress/zstd_cwksp.h
@@ -434,7 +434,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
 
 /**
  * Aligned on 64 bytes. These buffers have the special property that
- * their values remain constrained, allowing us to re-use them without
+ * their values remain constrained, allowing us to reuse them without
  * memset()-ing them.
  */
 MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
@@ -526,7 +526,7 @@ MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
     DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
 
 #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
-    /* To validate that the table re-use logic is sound, and that we don't
+    /* To validate that the table reuse logic is sound, and that we don't
      * access table space that we haven't cleaned, we re-"poison" the table
      * space every time we mark it dirty.
      * Since tableValidEnd space and initOnce space may overlap we don't poison
@@ -603,9 +603,9 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
     DEBUGLOG(4, "cwksp: clearing!");
 
 #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
-    /* To validate that the context re-use logic is sound, and that we don't
+    /* To validate that the context reuse logic is sound, and that we don't
      * access stuff that this compression hasn't initialized, we re-"poison"
-     * the workspace except for the areas in which we expect memory re-use
+     * the workspace except for the areas in which we expect memory reuse
      * without initialization (objects, valid tables area and init once
      * memory). */
     {
diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c
index 9e5c5144b7b..aa506be2039 100644
--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@@ -1431,7 +1431,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
                 BIT_DStream_completed < BIT_DStream_overflow);
 
         /* decompress without overrunning litPtr begins */
-        {   seq_t sequence = {0,0,0};  /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one interation) */
+        {   seq_t sequence = {0,0,0};  /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */
             /* Align the decompression loop to 32 + 16 bytes.
                 *
                 * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
@@ -1591,7 +1591,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
         litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
         dctx->litBufferLocation = ZSTD_not_in_dst;
     }
-    /* copy last literals from interal buffer */
+    /* copy last literals from internal buffer */
     {   size_t const lastLLSize = litBufferEnd - litPtr;
         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
         if (op != NULL) {
diff --git a/lib/zstd.h b/lib/zstd.h
index c3cf056436b..c33dab3cd9f 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -249,7 +249,7 @@ ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compres
 /*= Compression context
  *  When compressing many times,
  *  it is recommended to allocate a context just once,
- *  and re-use it for each successive compression operation.
+ *  and reuse it for each successive compression operation.
  *  This will make workload friendlier for system's memory.
  *  Note : re-using context is just a speed / resource optimization.
  *         It doesn't change the compression ratio, which remains identical.
@@ -276,7 +276,7 @@ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
 /*= Decompression context
  *  When decompressing many times,
  *  it is recommended to allocate a context only once,
- *  and re-use it for each successive compression operation.
+ *  and reuse it for each successive compression operation.
  *  This will make workload friendlier for system's memory.
  *  Use one context per thread for parallel execution. */
 typedef struct ZSTD_DCtx_s ZSTD_DCtx;
@@ -682,14 +682,14 @@ typedef struct ZSTD_outBuffer_s {
 *  A ZSTD_CStream object is required to track streaming operation.
 *  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
 *  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
-*  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+*  It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
 *
 *  For parallel execution, use one separate ZSTD_CStream per thread.
 *
 *  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
 *
 *  Parameters are sticky : when starting a new compression on the same context,
-*  it will re-use the same sticky parameters as previous compression session.
+*  it will reuse the same sticky parameters as previous compression session.
 *  When in doubt, it's recommended to fully initialize the context before usage.
 *  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
 *  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
@@ -837,7 +837,7 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 *
 *  A ZSTD_DStream object is required to track streaming operations.
 *  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
-*  ZSTD_DStream objects can be re-used multiple times.
+*  ZSTD_DStream objects can be reused multiple times.
 *
 *  Use ZSTD_initDStream() to start a new decompression operation.
 * @return : recommended first input size
@@ -1023,7 +1023,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
  *
  * This API allows dictionaries to be used with ZSTD_compress2(),
  * ZSTD_compressStream2(), and ZSTD_decompressDCtx().
- * Dictionaries are sticky, they remain valid when same context is re-used,
+ * Dictionaries are sticky, they remain valid when same context is reused,
  * they only reset when the context is reset
  * with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters.
  * In contrast, Prefixes are single-use.
@@ -2581,7 +2581,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
  *       explicitly specified.
  *
  *  start a new frame, using same parameters from previous frame.
- *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
+ *  This is typically useful to skip dictionary loading stage, since it will reuse it in-place.
  *  Note that zcs must be init at least once before using ZSTD_resetCStream().
  *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
  *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
@@ -2657,7 +2657,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const Z
  *
  *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
  *
- * re-use decompression parameters from previous init; saves dictionary loading
+ * reuse decompression parameters from previous init; saves dictionary loading
  */
 ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions")
 ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
@@ -2844,7 +2844,7 @@ ZSTD_registerSequenceProducer(
 
   A ZSTD_CCtx object is required to track streaming operations.
   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
-  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+  ZSTD_CCtx object can be reused multiple times within successive compression operations.
 
   Start by initializing a context.
   Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
@@ -2865,7 +2865,7 @@ ZSTD_registerSequenceProducer(
   It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
   Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
 
-  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
+  `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again.
 */
 
 /*=====   Buffer-less streaming compression functions  =====*/
@@ -2897,7 +2897,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_
 
   A ZSTD_DCtx object is required to track streaming operations.
   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
-  A ZSTD_DCtx object can be re-used multiple times.
+  A ZSTD_DCtx object can be reused multiple times.
 
   First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
   Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
diff --git a/tests/fuzz/fuzz_third_party_seq_prod.h b/tests/fuzz/fuzz_third_party_seq_prod.h
index f04ad31ad52..f0771e47be4 100644
--- a/tests/fuzz/fuzz_third_party_seq_prod.h
+++ b/tests/fuzz/fuzz_third_party_seq_prod.h
@@ -52,7 +52,7 @@ extern "C" {
 size_t FUZZ_seqProdSetup(void);
 
 /* The fuzzer will call this function after each test-case. It should free
- * resources aquired by FUZZ_seqProdSetup() to prevent leaks across test-cases.
+ * resources acquired by FUZZ_seqProdSetup() to prevent leaks across test-cases.
  *
  * The fuzzer will assert() that the return value is zero. To signal an error,
  * please return a non-zero value. */
@@ -72,7 +72,7 @@ size_t FUZZ_seqProdTearDown(void);
 void* FUZZ_createSeqProdState(void);
 
 /* The fuzzer will call this function after each test-case. It should free any
- * resources aquired by FUZZ_createSeqProdState().
+ * resources acquired by FUZZ_createSeqProdState().
  *
  * The fuzzer will assert() that the return value is zero. To signal an error,
  * please return a non-zero value. */
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 4a68ff73168..eaf7ee5007b 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -376,7 +376,7 @@ static int threadPoolTests(void) {
 
     RDG_genBuffer(CNBuffer, CNBuffSize, 0.5, 0.5, 0);
 
-    DISPLAYLEVEL(3, "thread pool test : threadPool re-use roundtrips: ");
+    DISPLAYLEVEL(3, "thread pool test : threadPool reuse roundtrips: ");
     {
         ZSTD_CCtx* cctx = ZSTD_createCCtx();
         ZSTD_threadPool* pool = ZSTD_createThreadPool(kPoolNumThreads);
@@ -1525,14 +1525,14 @@ static int basicUnitTests(U32 const seed, double compressibility)
     }
     DISPLAYLEVEL(3, "OK \n");
 
-    DISPLAYLEVEL(3, "test%3d : re-use CCtx with expanding block size : ", testNb++);
+    DISPLAYLEVEL(3, "test%3d : reuse CCtx with expanding block size : ", testNb++);
     {   ZSTD_CCtx* const cctx = ZSTD_createCCtx();
         ZSTD_parameters const params = ZSTD_getParams(1, ZSTD_CONTENTSIZE_UNKNOWN, 0);
         assert(params.fParams.contentSizeFlag == 1);  /* block size will be adapted if pledgedSrcSize is enabled */
         CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, 1 /*pledgedSrcSize*/) );
         CHECK_Z( ZSTD_compressEnd(cctx, compressedBuffer, compressedBufferSize, CNBuffer, 1) ); /* creates a block size of 1 */
 
-        CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN) );  /* re-use same parameters */
+        CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN) );  /* reuse same parameters */
         {   size_t const inSize = 2* 128 KB;
             size_t const outSize = ZSTD_compressBound(inSize);
             CHECK_Z( ZSTD_compressEnd(cctx, compressedBuffer, outSize, CNBuffer, inSize) );
@@ -1827,7 +1827,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
         params.cParams.windowLog = ZSTD_WINDOWLOG_MAX;
         for (cnb = 0; cnb < nbCompressions; ++cnb) {
             DISPLAYLEVEL(6, "run %zu / %zu \n", cnb, nbCompressions);
-            CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN) );  /* re-use same parameters */
+            CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN) );  /* reuse same parameters */
             CHECK_Z( ZSTD_compressEnd(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize) );
         }
         ZSTD_freeCCtx(cctx);
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 85d0fc81fe0..04f1f8b0e9c 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -408,8 +408,8 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
     if (inBuff.pos != inBuff.size) goto _output_error;   /* should have read the entire frame */
     DISPLAYLEVEL(3, "OK \n");
 
-    /* Re-use without init */
-    DISPLAYLEVEL(3, "test%3i : decompress again without init (re-use previous settings): ", testNb++);
+    /* Reuse without init */
+    DISPLAYLEVEL(3, "test%3i : decompress again without init (reuse previous settings): ", testNb++);
     outBuff.pos = 0;
     { size_t const remaining = ZSTD_decompressStream(zd, &outBuff, &inBuff2);
       if (remaining != 0) goto _output_error; }  /* should reach end of frame == 0; otherwise, some data left, or an error */
@@ -653,8 +653,8 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
             DISPLAYLEVEL(3, "OK (error detected : %s) \n", ZSTD_getErrorName(r));
     }   }
 
-    /* Compression state re-use scenario */
-    DISPLAYLEVEL(3, "test%3i : context re-use : ", testNb++);
+    /* Compression state reuse scenario */
+    DISPLAYLEVEL(3, "test%3i : context reuse : ", testNb++);
     ZSTD_freeCStream(zc);
     zc = ZSTD_createCStream();
     if (zc==NULL) goto _output_error;   /* memory allocation issue */

From 585aaa0ed324a858226908fc1f00d78ed92b0f4b Mon Sep 17 00:00:00 2001
From: Dimitri Papadopoulos
 <3234522+DimitriPapadopoulos@users.noreply.github.com>
Date: Sat, 23 Sep 2023 19:03:18 +0200
Subject: [PATCH 093/283] Do not test WIN32, instead test _WIN32

To the best of my knowledge:
* `_WIN32` and `_WIN64` are defined by the compiler,
* `WIN32` and `WIN64` are defined by the user, to indicate whatever
  the user chooses them to indicate. They mean 32-bit and 64-bit Windows
  compilation by convention only.

See:
https://accu.org/journals/overload/24/132/wilson_2223/

Windows compilers in general, and MSVC in particular, have been defining
`_WIN32` and `_WIN64` for a long time, provably at least since Visual Studio
2015, and in practice as early as in the days of 16-bit Windows.

See:
https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros?view=msvc-140
https://learn.microsoft.com/en-us/windows/win32/winprog64/the-tools

Tests used to be inconsistent, sometimes testing `_WIN32`, sometimes
`_WIN32` and `WIN32`. This brings consistency to Windows detection.
---
 contrib/seekable_format/examples/parallel_processing.c | 2 +-
 programs/fileio.c                                      | 2 +-
 programs/platform.h                                    | 4 ++--
 programs/util.h                                        | 2 +-
 zlibWrapper/examples/minigzip.c                        | 4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/contrib/seekable_format/examples/parallel_processing.c b/contrib/seekable_format/examples/parallel_processing.c
index 356561e5a60..928371025c9 100644
--- a/contrib/seekable_format/examples/parallel_processing.c
+++ b/contrib/seekable_format/examples/parallel_processing.c
@@ -19,7 +19,7 @@
 #define ZSTD_STATIC_LINKING_ONLY
 #include       // presumes zstd library is installed
 #include 
-#if defined(WIN32) || defined(_WIN32)
+#if defined(_WIN32)
 #  include 
 #  define SLEEP(x) Sleep(x)
 #else
diff --git a/programs/fileio.c b/programs/fileio.c
index 2172325423e..81d343023e3 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -527,7 +527,7 @@ static int FIO_removeFile(const char* path)
         DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path);
         return 0;
     }
-#if defined(_WIN32) || defined(WIN32)
+#if defined(_WIN32)
     /* windows doesn't allow remove read-only files,
      * so try to make it writable first */
     if (!(statbuf.st_mode & _S_IWRITE)) {
diff --git a/programs/platform.h b/programs/platform.h
index 43c5dc9694d..bbe0965ae76 100644
--- a/programs/platform.h
+++ b/programs/platform.h
@@ -141,7 +141,7 @@ extern "C" {
 #elif defined(MSDOS) || defined(OS2)
 #  include        /* _isatty */
 #  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
-#elif defined(WIN32) || defined(_WIN32)
+#elif defined(_WIN32)
 #  include       /* _isatty */
 #  include  /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
 #  include    /* FILE */
@@ -157,7 +157,7 @@ static __inline int IS_CONSOLE(FILE* stdStream) {
 /******************************
 *  OS-specific IO behaviors
 ******************************/
-#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32)
+#if defined(MSDOS) || defined(OS2) || defined(_WIN32)
 #  include    /* _O_BINARY */
 #  include       /* _setmode, _fileno, _get_osfhandle */
 #  if !defined(__DJGPP__)
diff --git a/programs/util.h b/programs/util.h
index 8234646bf3d..571d3942198 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -338,7 +338,7 @@ void UTIL_refFilename(FileNamesTable* fnt, const char* filename);
 FileNamesTable*
 UTIL_createExpandedFNT(const char* const* filenames, size_t nbFilenames, int followLinks);
 
-#if defined(_WIN32) || defined(WIN32)
+#if defined(_WIN32)
 DWORD CountSetBits(ULONG_PTR bitMask);
 #endif
 
diff --git a/zlibWrapper/examples/minigzip.c b/zlibWrapper/examples/minigzip.c
index 717a94df970..67a17907b47 100644
--- a/zlibWrapper/examples/minigzip.c
+++ b/zlibWrapper/examples/minigzip.c
@@ -34,7 +34,7 @@
 #  include 
 #endif
 
-#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
+#if defined(MSDOS) || defined(OS2) || defined(_WIN32) || defined(__CYGWIN__)
 #  include 
 #  include 
 #  ifdef UNDER_CE
@@ -63,7 +63,7 @@
 #endif
 
 #if !defined(Z_HAVE_UNISTD_H) && !defined(_LARGEFILE64_SOURCE)
-#ifndef WIN32 /* unlink already in stdio.h for WIN32 */
+#ifndef _WIN32 /* unlink already in stdio.h for WIN32 */
   extern int unlink _Z_OF((const char *));
 #endif
 #endif

From d5cbae7c50835b84114efd3c80ff8bbe99080fe8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 25 Sep 2023 05:18:07 +0000
Subject: [PATCH 094/283] Bump actions/checkout from 4.0.0 to 4.1.0

Bumps [actions/checkout](https://github.com/actions/checkout) from 4.0.0 to 4.1.0.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/3df4ab11eba7bda6032a0b82a6bb43b11571feac...8ade135a41bc03ea155e62e844d188df1ea18608)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
---
 .github/workflows/dev-long-tests.yml          | 50 ++++++-------
 .github/workflows/dev-short-tests.yml         | 74 +++++++++----------
 .../workflows/publish-release-artifacts.yml   |  2 +-
 .github/workflows/scorecards.yml              |  2 +-
 .github/workflows/windows-artifacts.yml       |  2 +-
 5 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml
index e2081ee46a7..2f2793bcd05 100644
--- a/.github/workflows/dev-long-tests.yml
+++ b/.github/workflows/dev-long-tests.yml
@@ -15,7 +15,7 @@ jobs:
   make-all:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3
+    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
     - name: make all
       run: make all
 
@@ -26,7 +26,7 @@ jobs:
       DEVNULLRIGHTS: 1
       READFROMBLOCKDEVICE: 1
     steps:
-    - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3
+    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
     - name: make test
       run: make test
 
@@ -34,7 +34,7 @@ jobs:
   make-test-osx:
     runs-on: macos-latest
     steps:
-    - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3
+    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
     - name: OS-X test
       run: make test # make -c lib all doesn't work because of the fact that it's not a tty
 
@@ -45,7 +45,7 @@ jobs:
       DEVNULLRIGHTS: 1
       READFROMBLOCKDEVICE: 1
     steps:
-    - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3
+    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
     - name: make test
       run: |
         sudo apt-get -qqq update
@@ -55,21 +55,21 @@ jobs:
   no-intrinsics-fuzztest:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3
+    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
     - name: no intrinsics fuzztest
       run: MOREFLAGS="-DZSTD_NO_INTRINSICS" make -C tests fuzztest
 
   tsan-zstreamtest:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3
+    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
     - name: thread sanitizer zstreamtest
       run: CC=clang ZSTREAM_TESTTIME=-T3mn make tsan-test-zstream
 
   ubsan-zstreamtest:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3
+    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
     - name: undefined behavior sanitizer zstreamtest
       run: CC=clang make uasan-test-zstream
 
@@ -77,7 +77,7 @@ jobs:
   tsan-fuzztest:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3
+    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
     - name: thread sanitizer fuzztest
       run: CC=clang make tsan-fuzztest
 
@@ -85,7 +85,7 @@ jobs:
   big-tests-zstreamtest32:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3
+    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
     - name: zstream tests in 32bit mode, with big tests
       run: |
         sudo apt-get -qqq update
@@ -96,7 +96,7 @@ jobs:
   gcc-8-asan-ubsan-testzstd:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3
+    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
     - name: gcc-8 + ASan + UBSan + Test Zstd
       # See https://askubuntu.com/a/1428822
       run: |
@@ -108,14 +108,14 @@ jobs:
   clang-asan-ubsan-testzstd:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # tag=v3
+    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
     - name: clang + ASan + UBSan + Test Zstd
       run: CC=clang make -j uasan-test-zstd 
Date: Wed, 27 Sep 2023 21:18:20 -0700
Subject: [PATCH 095/283] fix x32 tests on Github CI

ubuntu-22.04 seems to have problems with x32 recently
switching to ubuntu-20.04 which seems to work fine so far

https://github.com/actions/runner-images/issues/8397
---
 .github/workflows/dev-short-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml
index fd0ed79db1a..2da03e98a39 100644
--- a/.github/workflows/dev-short-tests.yml
+++ b/.github/workflows/dev-short-tests.yml
@@ -38,7 +38,7 @@ jobs:
         CFLAGS="-m32 -O1 -fstack-protector" make check V=1
 
   check-x32:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04  # ubuntu-latest == ubuntu-22.04 have issues currently with x32
     steps:
     - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
     - name: make check on x32 ABI # https://en.wikipedia.org/wiki/X32_ABI

From 3daed7017af2f015dc34e88ff4ac1cac8cd7e511 Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Fri, 25 Aug 2023 11:18:56 -0700
Subject: [PATCH 096/283] Revert "Work around nullptr-with-nonzero-offset
 warning"

This reverts commit c27fa399042f466080e79bb4fd8a4871bc0bcf28.
---
 lib/decompress/zstd_decompress.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index ccfd84fa42d..94eb95151b5 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -1548,12 +1548,6 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
     dctx->stage = ZSTDds_getFrameHeaderSize;
     dctx->processedCSize = 0;
     dctx->decodedSize = 0;
-    /* Set to non-null because ZSTD_prefetchMatch() may end up doing addition
-     * with this value for corrupted frames. However, it then just passes the
-     * pointer to PREFETCH_L1(), which doesn't require valid pointers. But,
-     * if it is NULL we get nullptr-with-nonzero-offset UBSAN warnings.
-     */
-    dctx->previousDstEnd = "";
     dctx->previousDstEnd = NULL;
     dctx->prefixStart = NULL;
     dctx->virtualStart = NULL;

From 43118da8a7fb51e660bfa7e958639c5cc8285580 Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Tue, 26 Sep 2023 17:53:26 -0700
Subject: [PATCH 097/283] Stop suppressing pointer-overflow UBSAN errors

* Remove all pointer-overflow suppressions from our UBSAN builds/tests.
* Add `ZSTD_ALLOW_POINTER_OVERFLOW_ATTR` macro to suppress
  pointer-overflow at a per-function level. This is a superior approach
  because it also applies to users who build zstd with UBSAN.
* Add `ZSTD_wrappedPtr{Diff,Add,Sub}()` that use these suppressions.
  The end goal is to only tag these functions with
  `ZSTD_ALLOW_POINTER_OVERFLOW`. But we can start by annoting functions
  that rely on pointer overflow, and gradually transition to using
  these.
* Add `ZSTD_maybeNullPtrAdd()` to simplify pointer addition when the
  pointer may be `NULL`.
* Fix all the fuzzer issues that came up. I'm sure there will be a lot
  more, but these are the ones that came up within a few minutes of
  running the fuzzers, and while running GitHub CI.
---
 Makefile                               |  6 +--
 lib/common/compiler.h                  | 70 ++++++++++++++++++++++++++
 lib/compress/zstd_compress_internal.h  |  8 ++-
 lib/compress/zstd_double_fast.c        | 14 ++++--
 lib/compress/zstd_fast.c               | 18 +++++--
 lib/compress/zstd_lazy.c               | 63 +++++++++++++++--------
 lib/compress/zstd_ldm.c                |  4 +-
 lib/compress/zstd_opt.c                | 36 ++++++++-----
 lib/decompress/huf_decompress.c        | 16 +++---
 lib/decompress/zstd_decompress.c       |  4 +-
 lib/decompress/zstd_decompress_block.c | 24 ++++++---
 lib/legacy/zstd_legacy.h               | 30 +++++++++++
 lib/legacy/zstd_v01.c                  |  2 +
 lib/legacy/zstd_v02.c                  | 16 +-----
 lib/legacy/zstd_v03.c                  | 16 +-----
 lib/legacy/zstd_v04.c                  |  6 ++-
 lib/legacy/zstd_v05.c                  |  1 +
 lib/legacy/zstd_v06.c                  |  2 +
 lib/legacy/zstd_v07.c                  |  3 +-
 tests/decodecorpus.c                   |  2 +-
 tests/fuzz/fuzz.py                     |  8 ++-
 tests/fuzz/sequence_compression_api.c  |  2 +-
 tests/fuzzer.c                         |  4 +-
 23 files changed, 252 insertions(+), 103 deletions(-)

diff --git a/Makefile b/Makefile
index fd95c38901d..7bc7ec5bf91 100644
--- a/Makefile
+++ b/Makefile
@@ -317,7 +317,7 @@ update_regressionResults:
 # run UBsan with -fsanitize-recover=pointer-overflow
 # this only works with recent compilers such as gcc 8+
 usan: clean
-	$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=undefined -Werror $(MOREFLAGS)"
+	$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=undefined -Werror $(MOREFLAGS)"
 
 asan: clean
 	$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=address -Werror $(MOREFLAGS)"
@@ -335,10 +335,10 @@ asan32: clean
 	$(MAKE) -C $(TESTDIR) test32 CC=clang MOREFLAGS="-g -fsanitize=address $(MOREFLAGS)"
 
 uasan: clean
-	$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=address,undefined -Werror $(MOREFLAGS)"
+	$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=address,undefined -Werror $(MOREFLAGS)"
 
 uasan-%: clean
-	LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=address,undefined -Werror $(MOREFLAGS)" $(MAKE) -C $(TESTDIR) $*
+	LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=address,undefined -Werror $(MOREFLAGS)" $(MAKE) -C $(TESTDIR) $*
 
 tsan-%: clean
 	LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=thread -Werror $(MOREFLAGS)" $(MAKE) -C $(TESTDIR) $* FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)"
diff --git a/lib/common/compiler.h b/lib/common/compiler.h
index bcaa575dda6..df39d91c6e0 100644
--- a/lib/common/compiler.h
+++ b/lib/common/compiler.h
@@ -11,6 +11,8 @@
 #ifndef ZSTD_COMPILER_H
 #define ZSTD_COMPILER_H
 
+#include 
+
 #include "portability_macros.h"
 
 /*-*******************************************************
@@ -302,6 +304,74 @@
 *  Sanitizer
 *****************************************************************/
 
+/**
+ * Zstd relies on pointer overflow in its decompressor.
+ * We add this attribute to functions that rely on pointer overflow.
+ */
+#ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+#  if __has_attribute(no_sanitize)
+#    if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8
+       /* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */
+#      define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow")))
+#    else
+       /* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */
+#      define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow")))
+#    endif
+#  else
+#    define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+#  endif
+#endif
+
+/**
+ * Helper function to perform a wrapped pointer difference without trigging
+ * UBSAN.
+ *
+ * @returns lhs - rhs with wrapping
+ */
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs)
+{
+    return lhs - rhs;
+}
+
+/**
+ * Helper function to perform a wrapped pointer add without triggering UBSAN.
+ *
+ * @return ptr + add with wrapping
+ */
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add)
+{
+    return ptr + add;
+}
+
+/**
+ * Helper function to perform a wrapped pointer subtraction without triggering
+ * UBSAN.
+ *
+ * @return ptr - sub with wrapping
+ */
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub)
+{
+    return ptr - sub;
+}
+
+/**
+ * Helper function to add to a pointer that works around C's undefined behavior
+ * of adding 0 to NULL.
+ *
+ * @returns `ptr + add` except it defines `NULL + 0 == NULL`.
+ */
+MEM_STATIC
+unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add)
+{
+    return add > 0 ? ptr + add : ptr;
+}
+
 /* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
  * abundance of caution, disable our custom poisoning on mingw. */
 #ifdef __MINGW32__
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index ac8dfb71a41..b7c5c3c1acb 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -1053,7 +1053,9 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
  * The least significant cycleLog bits of the indices must remain the same,
  * which may be 0. Every index up to maxDist in the past must be valid.
  */
-MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
                                            U32 maxDist, void const* src)
 {
     /* preemptive overflow correction:
@@ -1246,7 +1248,9 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
  * forget about the extDict. Handles overlap of the prefix and extDict.
  * Returns non-zero if the segment is contiguous.
  */
-MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_window_update(ZSTD_window_t* window,
                                   void const* src, size_t srcSize,
                                   int forceNonContiguous)
 {
diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c
index d4544b39051..aaa6f3d3d20 100644
--- a/lib/compress/zstd_double_fast.c
+++ b/lib/compress/zstd_double_fast.c
@@ -13,7 +13,9 @@
 
 #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
 
-static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
                               void const* end, ZSTD_dictTableLoadMethod_e dtlm)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -49,7 +51,9 @@ static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
     }   }
 }
 
-static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
                               void const* end, ZSTD_dictTableLoadMethod_e dtlm)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -97,6 +101,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
 
 
 FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_doubleFast_noDict_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize, U32 const mls /* template */)
@@ -307,6 +312,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
 
 
 FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize,
@@ -591,7 +597,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
 }
 
 
-static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_doubleFast_extDict_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize,
         U32 const mls /* template */)
diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c
index 5f2c6a2edad..fb1ef60c1f8 100644
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@@ -11,7 +11,9 @@
 #include "zstd_compress_internal.h"  /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
 #include "zstd_fast.h"
 
-static void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
                         const void* const end,
                         ZSTD_dictTableLoadMethod_e dtlm)
 {
@@ -46,7 +48,9 @@ static void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
                 }   }   }   }
 }
 
-static void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
                         const void* const end,
                         ZSTD_dictTableLoadMethod_e dtlm)
 {
@@ -139,8 +143,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
  *
  * This is also the work we do at the beginning to enter the loop initially.
  */
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_compressBlock_fast_noDict_generic(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_fast_noDict_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize,
         U32 const mls, U32 const hasStep)
@@ -456,6 +461,7 @@ size_t ZSTD_compressBlock_fast(
 }
 
 FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_fast_dictMatchState_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
@@ -681,7 +687,9 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
 }
 
 
-static size_t ZSTD_compressBlock_fast_extDict_generic(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_fast_extDict_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
 {
diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c
index 834520fd763..3aba83c6fc3 100644
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -24,8 +24,9 @@
 *  Binary Tree search
 ***************************************/
 
-static void
-ZSTD_updateDUBT(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_updateDUBT(ZSTD_matchState_t* ms,
                 const BYTE* ip, const BYTE* iend,
                 U32 mls)
 {
@@ -68,8 +69,9 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
  *  sort one already inserted but unsorted position
  *  assumption : curr >= btlow == (curr - btmask)
  *  doesn't fail */
-static void
-ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
                  U32 curr, const BYTE* inputEnd,
                  U32 nbCompares, U32 btLow,
                  const ZSTD_dictMode_e dictMode)
@@ -157,8 +159,9 @@ ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
 }
 
 
-static size_t
-ZSTD_DUBT_findBetterDictMatch (
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_DUBT_findBetterDictMatch (
         const ZSTD_matchState_t* ms,
         const BYTE* const ip, const BYTE* const iend,
         size_t* offsetPtr,
@@ -235,8 +238,9 @@ ZSTD_DUBT_findBetterDictMatch (
 }
 
 
-static size_t
-ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
                         const BYTE* const ip, const BYTE* const iend,
                         size_t* offBasePtr,
                         U32 const mls,
@@ -386,8 +390,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
 
 
 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
                 const BYTE* const ip, const BYTE* const iLimit,
                       size_t* offBasePtr,
                 const U32 mls /* template */,
@@ -622,7 +627,9 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
 
 /* Update chains up to ip (excluded)
    Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_insertAndFindFirstIndex_internal(
                         ZSTD_matchState_t* ms,
                         const ZSTD_compressionParameters* const cParams,
                         const BYTE* ip, U32 const mls, U32 const lazySkipping)
@@ -656,6 +663,7 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
 
 /* inlining is important to hardwire a hot branch (template emulation) */
 FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_HcFindBestMatch(
                         ZSTD_matchState_t* ms,
                         const BYTE* const ip, const BYTE* const iLimit,
@@ -824,7 +832,9 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* t
  * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
  * but not beyond iLimit.
  */
-FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
                                    U32 const rowLog, U32 const mls,
                                    U32 idx, const BYTE* const iLimit)
 {
@@ -850,7 +860,9 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
  * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
  * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
  */
-FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
                                                   BYTE const* tagTable, BYTE const* base,
                                                   U32 idx, U32 const hashLog,
                                                   U32 const rowLog, U32 const mls,
@@ -868,10 +880,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTab
 /* ZSTD_row_update_internalImpl():
  * Updates the hash table with positions starting from updateStartIdx until updateEndIdx.
  */
-FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
-                                                        U32 updateStartIdx, U32 const updateEndIdx,
-                                                        U32 const mls, U32 const rowLog,
-                                                        U32 const rowMask, U32 const useCache)
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
+                                  U32 updateStartIdx, U32 const updateEndIdx,
+                                  U32 const mls, U32 const rowLog,
+                                  U32 const rowMask, U32 const useCache)
 {
     U32* const hashTable = ms->hashTable;
     BYTE* const tagTable = ms->tagTable;
@@ -897,9 +911,11 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
  * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate.
  * Skips sections of long matches as is necessary.
  */
-FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
-                                                    U32 const mls, U32 const rowLog,
-                                                    U32 const rowMask, U32 const useCache)
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
+                              U32 const mls, U32 const rowLog,
+                              U32 const rowMask, U32 const useCache)
 {
     U32 idx = ms->nextToUpdate;
     const BYTE* const base = ms->window.base;
@@ -1121,6 +1137,7 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGr
  * - Pick the longest match.
  */
 FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_RowFindBestMatch(
                         ZSTD_matchState_t* ms,
                         const BYTE* const ip, const BYTE* const iLimit,
@@ -1494,8 +1511,9 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
 *  Common parser - lazy strategy
 *********************************/
 
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_compressBlock_lazy_generic(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_lazy_generic(
                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
                         U32 rep[ZSTD_REP_NUM],
                         const void* src, size_t srcSize,
@@ -1915,6 +1933,7 @@ size_t ZSTD_compressBlock_btlazy2_dictMatchState(
  || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
  || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
 FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_lazy_extDict_generic(
                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
                         U32 rep[ZSTD_REP_NUM],
diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c
index 01c1f75aae4..7a0792ee458 100644
--- a/lib/compress/zstd_ldm.c
+++ b/lib/compress/zstd_ldm.c
@@ -322,7 +322,9 @@ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
     }
 }
 
-static size_t ZSTD_ldm_generateSequences_internal(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_ldm_generateSequences_internal(
         ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
         ldmParams_t const* params, void const* src, size_t srcSize)
 {
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 3d54e21aef8..11d460c60c7 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -405,9 +405,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
 
 /* Update hashTable3 up to ip (excluded)
    Assumption : always within prefix (i.e. not within extDict) */
-static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
-                                              U32* nextToUpdate3,
-                                              const BYTE* const ip)
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
+                                       U32* nextToUpdate3,
+                                       const BYTE* const ip)
 {
     U32* const hashTable3 = ms->hashTable3;
     U32 const hashLog3 = ms->hashLog3;
@@ -434,7 +436,9 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
  * @param ip assumed <= iend-8 .
  * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
  * @return : nb of positions added */
-static U32 ZSTD_insertBt1(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_insertBt1(
                 const ZSTD_matchState_t* ms,
                 const BYTE* const ip, const BYTE* const iend,
                 U32 const target,
@@ -553,6 +557,7 @@ static U32 ZSTD_insertBt1(
 }
 
 FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 void ZSTD_updateTree_internal(
                 ZSTD_matchState_t* ms,
                 const BYTE* const ip, const BYTE* const iend,
@@ -578,7 +583,9 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
     ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
 }
 
-FORCE_INLINE_TEMPLATE U32
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32
 ZSTD_insertBtAndGetAllMatches (
                 ZSTD_match_t* matches,  /* store result (found matches) in this table (presumed large enough) */
                 ZSTD_matchState_t* ms,
@@ -819,7 +826,9 @@ typedef U32 (*ZSTD_getAllMatchesFn)(
     U32 const ll0,
     U32 const lengthToBeat);
 
-FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_btGetAllMatches_internal(
         ZSTD_match_t* matches,
         ZSTD_matchState_t* ms,
         U32* nextToUpdate3,
@@ -1060,7 +1069,9 @@ listStats(const U32* table, int lastEltID)
 
 #endif
 
-FORCE_INLINE_TEMPLATE size_t
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t
 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                                seqStore_t* seqStore,
                                U32 rep[ZSTD_REP_NUM],
@@ -1388,11 +1399,12 @@ size_t ZSTD_compressBlock_btopt(
  * only works on first block, with no dictionary and no ldm.
  * this function cannot error out, its narrow contract must be respected.
  */
-static void
-ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
-                     seqStore_t* seqStore,
-                     U32 rep[ZSTD_REP_NUM],
-               const void* src, size_t srcSize)
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
+                          seqStore_t* seqStore,
+                          U32 rep[ZSTD_REP_NUM],
+                    const void* src, size_t srcSize)
 {
     U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
     ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c
index 5b217ac586c..15e4204024d 100644
--- a/lib/decompress/huf_decompress.c
+++ b/lib/decompress/huf_decompress.c
@@ -188,7 +188,7 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
 
     const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
 
-    BYTE* const oend = (BYTE*)dst + dstSize;
+    BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
 
     /* The fast decoding loop assumes 64-bit little-endian.
      * This condition is false on x32.
@@ -546,7 +546,7 @@ HUF_decompress1X1_usingDTable_internal_body(
     const HUF_DTable* DTable)
 {
     BYTE* op = (BYTE*)dst;
-    BYTE* const oend = op + dstSize;
+    BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize);
     const void* dtPtr = DTable + 1;
     const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
     BIT_DStream_t bitD;
@@ -574,6 +574,7 @@ HUF_decompress4X1_usingDTable_internal_body(
 {
     /* Check */
     if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
+    if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
 
     {   const BYTE* const istart = (const BYTE*) cSrc;
         BYTE* const ostart = (BYTE*) dst;
@@ -609,7 +610,7 @@ HUF_decompress4X1_usingDTable_internal_body(
 
         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
         if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
-        if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
+        assert(dstSize >= 6); /* validated above */
         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
@@ -798,7 +799,7 @@ HUF_decompress4X1_usingDTable_internal_fast(
 {
     void const* dt = DTable + 1;
     const BYTE* const iend = (const BYTE*)cSrc + 6;
-    BYTE* const oend = (BYTE*)dst + dstSize;
+    BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
     HUF_DecompressFastArgs args;
     {   size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
         FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
@@ -1307,7 +1308,7 @@ HUF_decompress1X2_usingDTable_internal_body(
 
     /* decode */
     {   BYTE* const ostart = (BYTE*) dst;
-        BYTE* const oend = ostart + dstSize;
+        BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize);
         const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
         DTableDesc const dtd = HUF_getDTableDesc(DTable);
@@ -1332,6 +1333,7 @@ HUF_decompress4X2_usingDTable_internal_body(
     const HUF_DTable* DTable)
 {
     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+    if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
 
     {   const BYTE* const istart = (const BYTE*) cSrc;
         BYTE* const ostart = (BYTE*) dst;
@@ -1367,7 +1369,7 @@ HUF_decompress4X2_usingDTable_internal_body(
 
         if (length4 > cSrcSize) return ERROR(corruption_detected);  /* overflow */
         if (opStart4 > oend) return ERROR(corruption_detected);     /* overflow */
-        if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
+        assert(dstSize >= 6 /* validated above */);
         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
@@ -1612,7 +1614,7 @@ HUF_decompress4X2_usingDTable_internal_fast(
     HUF_DecompressFastLoopFn loopFn) {
     void const* dt = DTable + 1;
     const BYTE* const iend = (const BYTE*)cSrc + 6;
-    BYTE* const oend = (BYTE*)dst + dstSize;
+    BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
     HUF_DecompressFastArgs args;
     {
         size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 94eb95151b5..027a0f8cc74 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -1058,7 +1058,9 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
     return (size_t)(op-ostart);
 }
 
-static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
                                         void* dst, size_t dstCapacity,
                                   const void* src, size_t srcSize,
                                   const void* dict, size_t dictSize,
diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c
index 9e5c5144b7b..a4e5c859df5 100644
--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@@ -902,6 +902,7 @@ static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length
  * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
  */
 FORCE_NOINLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_execSequenceEnd(BYTE* op,
     BYTE* const oend, seq_t sequence,
     const BYTE** litPtr, const BYTE* const litLimit,
@@ -949,6 +950,7 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
  * This version is intended to be used during instances where the litBuffer is still split.  It is kept separate to avoid performance impact for the good case.
  */
 FORCE_NOINLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
     BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
     const BYTE** litPtr, const BYTE* const litLimit,
@@ -994,6 +996,7 @@ size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
 }
 
 HINT_INLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_execSequence(BYTE* op,
     BYTE* const oend, seq_t sequence,
     const BYTE** litPtr, const BYTE* const litLimit,
@@ -1092,6 +1095,7 @@ size_t ZSTD_execSequence(BYTE* op,
 }
 
 HINT_INLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
     BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
     const BYTE** litPtr, const BYTE* const litLimit,
@@ -1403,7 +1407,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
     BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + maxDstSize;
+    BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
     const BYTE* litBufferEnd = dctx->litBufferEnd;
@@ -1612,7 +1616,7 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
     BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
+    BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer;
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
     const BYTE* const litEnd = litPtr + dctx->litSize;
@@ -1700,14 +1704,16 @@ ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
 
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
+FORCE_INLINE_TEMPLATE
+
+size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
                    const BYTE* const prefixStart, const BYTE* const dictEnd)
 {
     prefetchPos += sequence.litLength;
     {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
-        const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
-                                                                              * No consequence though : memory address is only used for prefetching, not for dereferencing */
+        /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+         * No consequence though : memory address is only used for prefetching, not for dereferencing */
+        const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset);
         PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
     }
     return prefetchPos + sequence.matchLength;
@@ -1727,7 +1733,7 @@ ZSTD_decompressSequencesLong_body(
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
     BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
+    BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
     const BYTE* litBufferEnd = dctx->litBufferEnd;
@@ -2088,7 +2094,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
          * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
          */
         size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
-        size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart);
+        size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart);
         /* isLongOffset must be true if there are long offsets.
          * Offsets are long if they are larger than ZSTD_maxShortOffset().
          * We don't expect that to be the case in 64-bit mode.
@@ -2168,6 +2174,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
 }
 
 
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
 {
     if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
@@ -2187,6 +2194,7 @@ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
     dctx->isFrameDecompression = 0;
     ZSTD_checkContinuity(dctx, dst, dstCapacity);
     dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming);
+    FORWARD_IF_ERROR(dSize, "");
     dctx->previousDstEnd = (char*)dst + dSize;
     return dSize;
 }
diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h
index dd173251d34..7a8a04e593c 100644
--- a/lib/legacy/zstd_legacy.h
+++ b/lib/legacy/zstd_legacy.h
@@ -124,6 +124,20 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
                const void* dict,size_t dictSize)
 {
     U32 const version = ZSTD_isLegacy(src, compressedSize);
+    char x;
+    /* Avoid passing NULL to legacy decoding. */
+    if (dst == NULL) {
+        assert(dstCapacity == 0);
+        dst = &x;
+    }
+    if (src == NULL) {
+        assert(compressedSize == 0);
+        src = &x;
+    }
+    if (dict == NULL) {
+        assert(dictSize == 0);
+        dict = &x;
+    }
     (void)dst; (void)dstCapacity; (void)dict; (void)dictSize;  /* unused when ZSTD_LEGACY_SUPPORT >= 8 */
     switch(version)
     {
@@ -287,6 +301,12 @@ MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
 MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
                                         const void* dict, size_t dictSize)
 {
+    char x;
+    /* Avoid passing NULL to legacy decoding. */
+    if (dict == NULL) {
+        assert(dictSize == 0);
+        dict = &x;
+    }
     DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
     if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
     switch(newVersion)
@@ -346,6 +366,16 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
 MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
                                               ZSTD_outBuffer* output, ZSTD_inBuffer* input)
 {
+    static char x;
+    /* Avoid passing NULL to legacy decoding. */
+    if (output->dst == NULL) {
+        assert(output->size == 0);
+        output->dst = &x;
+    }
+    if (input->src == NULL) {
+        assert(input->size == 0);
+        input->src = &x;
+    }
     DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
     switch(version)
     {
diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c
index 1a3aad07ed8..6cf51234a24 100644
--- a/lib/legacy/zstd_v01.c
+++ b/lib/legacy/zstd_v01.c
@@ -14,6 +14,7 @@
 ******************************************/
 #include     /* size_t, ptrdiff_t */
 #include "zstd_v01.h"
+#include "../common/compiler.h"
 #include "../common/error_private.h"
 
 
@@ -2118,6 +2119,7 @@ size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSi
         }
         ctx->phase = 1;
         ctx->expected = ZSTD_blockHeaderSize;
+        if (ZSTDv01_isError(rSize)) return rSize;
         ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
         return rSize;
     }
diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index e09bb4a248c..80615e556db 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -11,6 +11,7 @@
 
 #include     /* size_t, ptrdiff_t */
 #include "zstd_v02.h"
+#include "../common/compiler.h"
 #include "../common/error_private.h"
 
 
@@ -71,20 +72,6 @@ extern "C" {
 #include     /* memcpy */
 
 
-/******************************************
-*  Compiler-specific
-******************************************/
-#if defined(__GNUC__)
-#  define MEM_STATIC static __attribute__((unused))
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-#  define MEM_STATIC static inline
-#elif defined(_MSC_VER)
-#  define MEM_STATIC static __inline
-#else
-#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
-#endif
-
-
 /****************************************************************
 *  Basic Types
 *****************************************************************/
@@ -3431,6 +3418,7 @@ static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSi
         }
         ctx->phase = 1;
         ctx->expected = ZSTD_blockHeaderSize;
+        if (ZSTD_isError(rSize)) return rSize;
         ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
         return rSize;
     }
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
index b0d7f521ed0..082fe870502 100644
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@@ -11,6 +11,7 @@
 
 #include     /* size_t, ptrdiff_t */
 #include "zstd_v03.h"
+#include "../common/compiler.h"
 #include "../common/error_private.h"
 
 
@@ -72,20 +73,6 @@ extern "C" {
 #include     /* memcpy */
 
 
-/******************************************
-*  Compiler-specific
-******************************************/
-#if defined(__GNUC__)
-#  define MEM_STATIC static __attribute__((unused))
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-#  define MEM_STATIC static inline
-#elif defined(_MSC_VER)
-#  define MEM_STATIC static __inline
-#else
-#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
-#endif
-
-
 /****************************************************************
 *  Basic Types
 *****************************************************************/
@@ -3071,6 +3058,7 @@ static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSi
         }
         ctx->phase = 1;
         ctx->expected = ZSTD_blockHeaderSize;
+        if (ZSTD_isError(rSize)) return rSize;
         ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
         return rSize;
     }
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index fa65160bcc8..0da316c158e 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -16,6 +16,7 @@
 #include     /* memcpy */
 
 #include "zstd_v04.h"
+#include "../common/compiler.h"
 #include "../common/error_private.h"
 
 
@@ -3209,6 +3210,7 @@ static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSi
             }
             ctx->stage = ZSTDds_decodeBlockHeader;
             ctx->expected = ZSTD_blockHeaderSize;
+            if (ZSTD_isError(rSize)) return rSize;
             ctx->previousDstEnd = (char*)dst + rSize;
             return rSize;
         }
@@ -3536,8 +3538,8 @@ static size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDs
 unsigned ZBUFFv04_isError(size_t errorCode) { return ERR_isError(errorCode); }
 const char* ZBUFFv04_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
 
-size_t ZBUFFv04_recommendedDInSize()  { return BLOCKSIZE + 3; }
-size_t ZBUFFv04_recommendedDOutSize() { return BLOCKSIZE; }
+size_t ZBUFFv04_recommendedDInSize(void)  { return BLOCKSIZE + 3; }
+size_t ZBUFFv04_recommendedDOutSize(void) { return BLOCKSIZE; }
 
 
 
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
index 93a1169f3b6..44a877bf139 100644
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@@ -3600,6 +3600,7 @@ size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t maxDstSi
             }
             dctx->stage = ZSTDv05ds_decodeBlockHeader;
             dctx->expected = ZSTDv05_blockHeaderSize;
+            if (ZSTDv05_isError(rSize)) return rSize;
             dctx->previousDstEnd = (char*)dst + rSize;
             return rSize;
         }
diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c
index ac9ae987c47..00d6ef79aa2 100644
--- a/lib/legacy/zstd_v06.c
+++ b/lib/legacy/zstd_v06.c
@@ -14,6 +14,7 @@
 #include     /* size_t, ptrdiff_t */
 #include     /* memcpy */
 #include     /* malloc, free, qsort */
+#include "../common/compiler.h"
 #include "../common/error_private.h"
 
 
@@ -3736,6 +3737,7 @@ size_t ZSTDv06_decompressContinue(ZSTDv06_DCtx* dctx, void* dst, size_t dstCapac
             }
             dctx->stage = ZSTDds_decodeBlockHeader;
             dctx->expected = ZSTDv06_blockHeaderSize;
+            if (ZSTDv06_isError(rSize)) return rSize;
             dctx->previousDstEnd = (char*)dst + rSize;
             return rSize;
         }
diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c
index b214ec08bdf..8778f079ca2 100644
--- a/lib/legacy/zstd_v07.c
+++ b/lib/legacy/zstd_v07.c
@@ -24,6 +24,7 @@
 #define HUFv07_STATIC_LINKING_ONLY   /* HUFv07_TABLELOG_ABSOLUTEMAX */
 #define ZSTDv07_STATIC_LINKING_ONLY
 
+#include "../common/compiler.h"
 #include "../common/error_private.h"
 
 
@@ -4006,8 +4007,8 @@ size_t ZSTDv07_decompressContinue(ZSTDv07_DCtx* dctx, void* dst, size_t dstCapac
             }
             dctx->stage = ZSTDds_decodeBlockHeader;
             dctx->expected = ZSTDv07_blockHeaderSize;
-            dctx->previousDstEnd = (char*)dst + rSize;
             if (ZSTDv07_isError(rSize)) return rSize;
+            dctx->previousDstEnd = (char*)dst + rSize;
             if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
             return rSize;
         }
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index a440ae38af2..1abc7df8b71 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -732,7 +732,7 @@ generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
             }
         } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
 
-        {   BYTE* const dictEnd = info.dictContent + info.dictContentSize;
+        {   BYTE* const dictEnd = ZSTD_maybeNullPtrAdd(info.dictContent, info.dictContentSize);
             size_t j;
             for (j = 0; j < matchLen; j++) {
                 if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) {
diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
index 8e0a9eaad8c..058fca8137b 100755
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@@ -250,10 +250,10 @@ def build_parser(args):
         action='store_true',
         help='Enable UBSAN')
     parser.add_argument(
-        '--enable-ubsan-pointer-overflow',
+        '--disable-ubsan-pointer-overflow',
         dest='ubsan_pointer_overflow',
-        action='store_true',
-        help='Enable UBSAN pointer overflow check (known failure)')
+        action='store_false',
+        help='Disable UBSAN pointer overflow check (known failure)')
     parser.add_argument(
         '--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
     parser.add_argument(
@@ -383,8 +383,6 @@ def build_parser(args):
         raise RuntimeError('MSAN may not be used with any other sanitizers')
     if args.msan_track_origins and not args.msan:
         raise RuntimeError('--enable-msan-track-origins requires MSAN')
-    if args.ubsan_pointer_overflow and not args.ubsan:
-        raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN')
     if args.sanitize_recover and not args.sanitize:
         raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
 
diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c
index ede7080e3a2..ec0106c18c6 100644
--- a/tests/fuzz/sequence_compression_api.c
+++ b/tests/fuzz/sequence_compression_api.c
@@ -116,7 +116,7 @@ static size_t decodeSequences(void* dst, size_t nbSequences,
                 }
             }
             for (; j < matchLength; ++j) {
-                op[j] = op[j - generatedSequences[i].offset];
+                op[j] = op[(ptrdiff_t)(j - generatedSequences[i].offset)];
             }
             op += j;
             FUZZ_ASSERT(generatedSequences[i].matchLength == j + k);
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 4a68ff73168..4a540270fdd 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -328,7 +328,7 @@ static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize,
 
         if (seqs[i].offset != 0) {
             for (j = 0; j < seqs[i].matchLength; ++j)
-                dst[j] = dst[j - seqs[i].offset];
+                dst[j] = dst[(ptrdiff_t)(j - seqs[i].offset)];
             dst += seqs[i].matchLength;
             src += seqs[i].matchLength;
             size -= seqs[i].matchLength;
@@ -3684,11 +3684,13 @@ static int basicUnitTests(U32 const seed, double compressibility)
 
         /* Test with block delimiters roundtrip */
         seqsSize = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
+        CHECK_Z(seqsSize);
         FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize, ZSTD_sf_explicitBlockDelimiters);
         assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
 
         /* Test no block delimiters roundtrip */
         seqsSize = ZSTD_mergeBlockDelimiters(seqs, seqsSize);
+        CHECK_Z(seqsSize);
         FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize, ZSTD_sf_noBlockDelimiters);
         assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
 

From e8ff7d18ebdb7af55ad73f92c5192e74bdc85ca2 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 7 Oct 2023 20:19:45 -0700
Subject: [PATCH 098/283] removed FlexArray pattern from CCtxPool

within ZSTDMT_.
This pattern is flagged by less forgiving variants of ubsan
notably used during compilation of the Linux Kernel.

There are 2 other places in the code where this pattern is used.
This fixes just one of them.
---
 lib/compress/zstd_compress.c   |  1 +
 lib/compress/zstdmt_compress.c | 29 ++++++++++++++++++-----------
 tests/fuzzer.c                 | 14 ++++++++------
 3 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index a51c0079258..b79266fdb7c 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -178,6 +178,7 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
 
 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
 {
+    DEBUGLOG(3, "ZSTD_freeCCtx (address: %p)", (void*)cctx);
     if (cctx==NULL) return 0;   /* support free on NULL */
     RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
                     "not compatible with static CCtx");
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index add99d769be..236daab2627 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -350,15 +350,16 @@ typedef struct {
     int totalCCtx;
     int availCCtx;
     ZSTD_customMem cMem;
-    ZSTD_CCtx* cctx[1];   /* variable size */
+    ZSTD_CCtx** cctxs;
 } ZSTDMT_CCtxPool;
 
-/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
+/* note : all CCtx borrowed from the pool must be reverted back to the pool _before_ freeing the pool */
 static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
 {
     int cid;
     for (cid=0; cidtotalCCtx; cid++)
-        ZSTD_freeCCtx(pool->cctx[cid]);  /* note : compatible with free on NULL */
+        ZSTD_freeCCtx(pool->cctxs[cid]);  /* note : compatible with free on NULL */
+    ZSTD_customFree(pool->cctxs, pool->cMem);
     ZSTD_pthread_mutex_destroy(&pool->poolMutex);
     ZSTD_customFree(pool, pool->cMem);
 }
@@ -373,14 +374,19 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
     assert(nbWorkers > 0);
     if (!cctxPool) return NULL;
     if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
+        ZSTDMT_freeCCtxPool(cctxPool);
+        return NULL;
+    }
+    cctxPool->totalCCtx = nbWorkers;
+    cctxPool->cctxs = (ZSTD_CCtx**)ZSTD_customCalloc(nbWorkers * sizeof(ZSTD_CCtx*), cMem);
+    if (!cctxPool->cctxs) {
         ZSTD_customFree(cctxPool, cMem);
         return NULL;
     }
     cctxPool->cMem = cMem;
-    cctxPool->totalCCtx = nbWorkers;
+    cctxPool->cctxs[0] = ZSTD_createCCtx_advanced(cMem);
+    if (!cctxPool->cctxs[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
     cctxPool->availCCtx = 1;   /* at least one cctx for single-thread mode */
-    cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
-    if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
     DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
     return cctxPool;
 }
@@ -404,14 +410,15 @@ static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
     {   unsigned const nbWorkers = cctxPool->totalCCtx;
         size_t const poolSize = sizeof(*cctxPool)
                                 + (nbWorkers-1) * sizeof(ZSTD_CCtx*);
-        unsigned u;
+        size_t const arraySize = cctxPool->totalCCtx * sizeof(ZSTD_CCtx*);
         size_t totalCCtxSize = 0;
+        unsigned u;
         for (u=0; ucctx[u]);
+            totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctxs[u]);
         }
         ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
         assert(nbWorkers > 0);
-        return poolSize + totalCCtxSize;
+        return poolSize + arraySize + totalCCtxSize;
     }
 }
 
@@ -421,7 +428,7 @@ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
     ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
     if (cctxPool->availCCtx) {
         cctxPool->availCCtx--;
-        {   ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
+        {   ZSTD_CCtx* const cctx = cctxPool->cctxs[cctxPool->availCCtx];
             ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
             return cctx;
     }   }
@@ -435,7 +442,7 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
     if (cctx==NULL) return;   /* compatibility with release on NULL */
     ZSTD_pthread_mutex_lock(&pool->poolMutex);
     if (pool->availCCtx < pool->totalCCtx)
-        pool->cctx[pool->availCCtx++] = cctx;
+        pool->cctxs[pool->availCCtx++] = cctx;
     else {
         /* pool overflow : should not happen, since totalCCtx==nbWorkers */
         DEBUGLOG(4, "CCtx pool overflow : free cctx");
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index c920fbb130c..d70a669047b 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -1119,6 +1119,9 @@ static int basicUnitTests(U32 const seed, double compressibility)
         size_t const srcSize1 = kWindowSize / 2;
         size_t const srcSize2 = kWindowSize * 10;
 
+        CHECK(cctx!=NULL);
+        CHECK(dctx!=NULL);
+        CHECK(dict!=NULL);
         if (CNBuffSize < dictSize) goto _output_error;
 
         RDG_genBuffer(dict, dictSize, 0.5, 0.5, seed);
@@ -1140,6 +1143,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
         cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize1);
         CHECK_Z(cSize);
         CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, dictSize));
+
         cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize2);
         /* Streaming decompression to catch out of bounds offsets. */
         {
@@ -1153,24 +1157,22 @@ static int basicUnitTests(U32 const seed, double compressibility)
         CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 2));
         /* Round trip once with a dictionary. */
         CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, dictSize));
-        {
-            ZSTD_inBuffer in = {CNBuffer, srcSize1, 0};
+        {   ZSTD_inBuffer in = {CNBuffer, srcSize1, 0};
             ZSTD_outBuffer out = {compressedBuffer, compressedBufferSize, 0};
             CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
             CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
             cSize = out.pos;
         }
         CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, dictSize));
-        {
-            ZSTD_inBuffer in = {CNBuffer, srcSize2, 0};
+
+        {   ZSTD_inBuffer in = {CNBuffer, srcSize2, 0};
             ZSTD_outBuffer out = {compressedBuffer, compressedBufferSize, 0};
             CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
             CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
             cSize = out.pos;
         }
         /* Streaming decompression to catch out of bounds offsets. */
-        {
-            ZSTD_inBuffer in = {compressedBuffer, cSize, 0};
+        {   ZSTD_inBuffer in = {compressedBuffer, cSize, 0};
             ZSTD_outBuffer out = {decodedBuffer, CNBuffSize, 0};
             size_t const dSize = ZSTD_decompressStream(dctx, &out, &in);
             CHECK_Z(dSize);

From c87ad5bdb59c95c16871ff99d83ec3b09bd742c8 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 7 Oct 2023 23:29:42 -0700
Subject: [PATCH 099/283] fixes suggested by @ebiggers

---
 lib/compress/zstdmt_compress.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 236daab2627..1b47734c29d 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -369,18 +369,18 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
 static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
                                               ZSTD_customMem cMem)
 {
-    ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc(
-        sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
+    ZSTDMT_CCtxPool* const cctxPool = 
+        (ZSTDMT_CCtxPool*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtxPool), cMem);
     assert(nbWorkers > 0);
     if (!cctxPool) return NULL;
     if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
-        ZSTDMT_freeCCtxPool(cctxPool);
+        ZSTD_customFree(cctxPool, cMem);
         return NULL;
     }
     cctxPool->totalCCtx = nbWorkers;
     cctxPool->cctxs = (ZSTD_CCtx**)ZSTD_customCalloc(nbWorkers * sizeof(ZSTD_CCtx*), cMem);
     if (!cctxPool->cctxs) {
-        ZSTD_customFree(cctxPool, cMem);
+        ZSTDMT_freeCCtxPool(cctxPool);
         return NULL;
     }
     cctxPool->cMem = cMem;
@@ -408,8 +408,7 @@ static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
 {
     ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
     {   unsigned const nbWorkers = cctxPool->totalCCtx;
-        size_t const poolSize = sizeof(*cctxPool)
-                                + (nbWorkers-1) * sizeof(ZSTD_CCtx*);
+        size_t const poolSize = sizeof(*cctxPool);
         size_t const arraySize = cctxPool->totalCCtx * sizeof(ZSTD_CCtx*);
         size_t totalCCtxSize = 0;
         unsigned u;

From ea4027c003d31bb75d24a2284d06ed4c06300f59 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 7 Oct 2023 23:32:22 -0700
Subject: [PATCH 100/283] removed unused macro constant

---
 lib/compress/zstdmt_compress.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 1b47734c29d..529fb61587f 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -15,17 +15,13 @@
 #endif
 
 
-/* ======   Constants   ====== */
-#define ZSTDMT_OVERLAPLOG_DEFAULT 0
-
-
 /* ======   Dependencies   ====== */
-#include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
 #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
 #include "../common/mem.h"         /* MEM_STATIC */
 #include "../common/pool.h"        /* threadpool */
 #include "../common/threading.h"   /* mutex */
-#include "zstd_compress_internal.h"  /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
+#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
 #include "zstd_ldm.h"
 #include "zstdmt_compress.h"
 

From 6bb1688c1a13a9368d7c1b6f992e0a0fa7c1cbba Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sun, 8 Oct 2023 00:25:17 -0700
Subject: [PATCH 101/283] extended the fix to ZSTDMT's Buffer Pool

---
 lib/compress/zstdmt_compress.c | 65 ++++++++++++++++++++--------------
 1 file changed, 38 insertions(+), 27 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 529fb61587f..6b3391a0f8d 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -96,18 +96,39 @@ typedef struct ZSTDMT_bufferPool_s {
     unsigned totalBuffers;
     unsigned nbBuffers;
     ZSTD_customMem cMem;
-    buffer_t bTable[1];   /* variable size */
+    buffer_t* buffers;
 } ZSTDMT_bufferPool;
 
+static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
+{
+    unsigned u;
+    DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
+    if (!bufPool) return;   /* compatibility with free on NULL */
+    if (bufPool->buffers) {
+        for (u=0; utotalBuffers; u++) {
+            DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->buffers[u].start);
+            ZSTD_customFree(bufPool->buffers[u].start, bufPool->cMem);
+        }
+        ZSTD_customFree(bufPool->buffers, bufPool->cMem);
+    }
+    ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
+    ZSTD_customFree(bufPool, bufPool->cMem);
+}
+
 static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
 {
-    ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
-        sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
+    ZSTDMT_bufferPool* const bufPool = 
+        (ZSTDMT_bufferPool*)ZSTD_customCalloc(sizeof(ZSTDMT_bufferPool), cMem);
     if (bufPool==NULL) return NULL;
     if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
         ZSTD_customFree(bufPool, cMem);
         return NULL;
     }
+    bufPool->buffers = (buffer_t*)ZSTD_customCalloc(maxNbBuffers * sizeof(buffer_t), cMem);
+    if (bufPool->buffers==NULL) {
+        ZSTDMT_freeBufferPool(bufPool);
+        return NULL;
+    }
     bufPool->bufferSize = 64 KB;
     bufPool->totalBuffers = maxNbBuffers;
     bufPool->nbBuffers = 0;
@@ -115,32 +136,19 @@ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_cu
     return bufPool;
 }
 
-static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
-{
-    unsigned u;
-    DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
-    if (!bufPool) return;   /* compatibility with free on NULL */
-    for (u=0; utotalBuffers; u++) {
-        DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
-        ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
-    }
-    ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
-    ZSTD_customFree(bufPool, bufPool->cMem);
-}
-
 /* only works at initialization, not during compression */
 static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
 {
-    size_t const poolSize = sizeof(*bufPool)
-                          + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
+    size_t const poolSize = sizeof(*bufPool);
+    size_t const arraySize = bufPool->totalBuffers * sizeof(buffer_t);
     unsigned u;
     size_t totalBufferSize = 0;
     ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
     for (u=0; utotalBuffers; u++)
-        totalBufferSize += bufPool->bTable[u].capacity;
+        totalBufferSize += bufPool->buffers[u].capacity;
     ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
 
-    return poolSize + totalBufferSize;
+    return poolSize + arraySize + totalBufferSize;
 }
 
 /* ZSTDMT_setBufferSize() :
@@ -183,9 +191,9 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
     DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
     ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
     if (bufPool->nbBuffers) {   /* try to use an existing buffer */
-        buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
+        buffer_t const buf = bufPool->buffers[--(bufPool->nbBuffers)];
         size_t const availBufferSize = buf.capacity;
-        bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
+        bufPool->buffers[bufPool->nbBuffers] = g_nullBuffer;
         if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
             /* large enough, but not too much */
             DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
@@ -246,14 +254,14 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
     if (buf.start == NULL) return;   /* compatible with release on NULL */
     ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
     if (bufPool->nbBuffers < bufPool->totalBuffers) {
-        bufPool->bTable[bufPool->nbBuffers++] = buf;  /* stored for later use */
+        bufPool->buffers[bufPool->nbBuffers++] = buf;  /* stored for later use */
         DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
                     (U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
         ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
         return;
     }
     ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
-    /* Reached bufferPool capacity (should not happen) */
+    /* Reached bufferPool capacity (note: should not happen) */
     DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
     ZSTD_customFree(buf.start, bufPool->cMem);
 }
@@ -353,10 +361,13 @@ typedef struct {
 static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
 {
     int cid;
-    for (cid=0; cidtotalCCtx; cid++)
-        ZSTD_freeCCtx(pool->cctxs[cid]);  /* note : compatible with free on NULL */
-    ZSTD_customFree(pool->cctxs, pool->cMem);
+    if (!pool) return;
     ZSTD_pthread_mutex_destroy(&pool->poolMutex);
+    if (pool->cctxs) {
+        for (cid=0; cidtotalCCtx; cid++)
+            ZSTD_freeCCtx(pool->cctxs[cid]);  /* free compatible with NULL */
+        ZSTD_customFree(pool->cctxs, pool->cMem);
+    }
     ZSTD_customFree(pool, pool->cMem);
 }
 

From e590c8a0e3b2ecdde5f63d385fa7f9bd759721d3 Mon Sep 17 00:00:00 2001
From: Xavier Mitault 
Date: Wed, 18 Oct 2023 13:22:15 +0200
Subject: [PATCH 102/283] Add doc on how to use it with cmake FetchContent

---
 build/cmake/README.md | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/build/cmake/README.md b/build/cmake/README.md
index a460dd16187..022e6168869 100644
--- a/build/cmake/README.md
+++ b/build/cmake/README.md
@@ -41,6 +41,31 @@ cmake -DZSTD_BUILD_TESTS=ON -DZSTD_LEGACY_SUPPORT=OFF ..
 make
 ```
 
+### how to use it with CMake FetchContent
+
+For all options available, you can see it on 
+```cmake
+include(FetchContent)
+
+set(ZSTD_BUILD_STATIC ON)
+set(ZSTD_BUILD_SHARED OFF)
+
+FetchContent_Declare(
+    zstd
+    URL "https://github.com/facebook/zstd/releases/download/v1.5.5/zstd-1.5.5.tar.gz"
+    DOWNLOAD_EXTRACT_TIMESTAMP TRUE
+    SOURCE_SUBDIR build/cmake
+)
+
+FetchContent_MakeAvailable(zstd)
+
+target_link_libraries(
+    ${PROJECT_NAME}
+    PRIVATE
+    libzstd_static
+)
+```
+
 ### referring
 [Looking for a 'cmake clean' command to clear up CMake output](https://stackoverflow.com/questions/9680420/looking-for-a-cmake-clean-command-to-clear-up-cmake-output)
 

From d988e00a7fe551785bc8c3de8cd5e4266280ce6d Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 9 Oct 2023 16:47:52 -0700
Subject: [PATCH 103/283] baby-step towards solving flexArray issue #3785

the flexArray in structure FSE_DecompressWksp
is just a way to derive a pointer easily,
without risk/complexity of calculating it manually.

Not sure if this change is good enough to avoid ubsan warnings though.
---
 lib/common/fse_decompress.c | 28 ++++++++++++++--------------
 lib/compress/fse_compress.c | 13 +++++++------
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c
index 1e1c9f92d6b..f9bee931f1b 100644
--- a/lib/common/fse_decompress.c
+++ b/lib/common/fse_decompress.c
@@ -84,7 +84,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
                     symbolNext[s] = 1;
                 } else {
                     if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
-                    symbolNext[s] = normalizedCounter[s];
+                    symbolNext[s] = (U16)normalizedCounter[s];
         }   }   }
         ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
     }
@@ -99,8 +99,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
          * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
          * our buffer to handle the over-write.
          */
-        {
-            U64 const add = 0x0101010101010101ull;
+        {   U64 const add = 0x0101010101010101ull;
             size_t pos = 0;
             U64 sv = 0;
             U32 s;
@@ -111,9 +110,8 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
                 for (i = 8; i < n; i += 8) {
                     MEM_write64(spread + pos + i, sv);
                 }
-                pos += n;
-            }
-        }
+                pos += (size_t)n;
+        }   }
         /* Now we spread those positions across the table.
          * The benefit of doing it in two stages is that we avoid the
          * variable size inner loop, which caused lots of branch misses.
@@ -232,12 +230,13 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
             break;
     }   }
 
-    return op-ostart;
+    assert(op >= ostart);
+    return (size_t)(op-ostart);
 }
 
 typedef struct {
     short ncount[FSE_MAX_SYMBOL_VALUE + 1];
-    FSE_DTable dtable[1]; /* Dynamically sized */
+    FSE_DTable dtable[1]; /* actual size is dynamic - member just helps get a pointer easily */
 } FSE_DecompressWksp;
 
 
@@ -252,13 +251,14 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
     unsigned tableLog;
     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
     FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
+    FSE_DTable* const dtable = wksp->dtable;
 
     DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
     if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
 
     /* normal FSE decoding mode */
-    {
-        size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
+    {   size_t const NCountLength =
+            FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
         if (FSE_isError(NCountLength)) return NCountLength;
         if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
         assert(NCountLength <= cSrcSize);
@@ -271,16 +271,16 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
     workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
     wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
 
-    CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
+    CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
 
     {
-        const void* ptr = wksp->dtable;
+        const void* ptr = dtable;
         const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
         const U32 fastMode = DTableH->fastMode;
 
         /* select fast mode (static) */
-        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
-        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
+        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
+        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
     }
 }
 
diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c
index 5d3770808dd..158ba80ca94 100644
--- a/lib/compress/fse_compress.c
+++ b/lib/compress/fse_compress.c
@@ -225,8 +225,8 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
     size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
                                    + 4 /* bitCount initialized at 4 */
                                    + 2 /* first two symbols may use one additional bit each */) / 8)
-                                    + 1 /* round up to whole nb bytes */
-                                    + 2 /* additional two bytes for bitstream flush */;
+                                   + 1 /* round up to whole nb bytes */
+                                   + 2 /* additional two bytes for bitstream flush */;
     return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
 }
 
@@ -255,7 +255,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
     /* Init */
     remaining = tableSize+1;   /* +1 for extra accuracy */
     threshold = tableSize;
-    nbBits = tableLog+1;
+    nbBits = (int)tableLog+1;
 
     while ((symbol < alphabetSize) && (remaining>1)) {  /* stops at 1 */
         if (previousIs0) {
@@ -274,7 +274,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
             }
             while (symbol >= start+3) {
                 start+=3;
-                bitStream += 3 << bitCount;
+                bitStream += 3U << bitCount;
                 bitCount += 2;
             }
             bitStream += (symbol-start) << bitCount;
@@ -294,7 +294,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
             count++;   /* +1 for extra accuracy */
             if (count>=threshold)
                 count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
-            bitStream += count << bitCount;
+            bitStream += (U32)count << bitCount;
             bitCount  += nbBits;
             bitCount  -= (count>8);
     out+= (bitCount+7) /8;
 
-    return (out-ostart);
+    assert(out >= ostart);
+    return (size_t)(out-ostart);
 }
 
 

From 24dabde507c8d141e282e568be21e648987a7d77 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Wed, 18 Oct 2023 22:45:57 -0700
Subject: [PATCH 104/283] revert to manually defining DTable

thus avoiding the analyzer and ubsan to associate DTable to a size of 1.
---
 lib/common/fse.h            |  5 +++--
 lib/common/fse_decompress.c | 12 +++++++-----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/lib/common/fse.h b/lib/common/fse.h
index 02a1f0bc530..2ae128e60db 100644
--- a/lib/common/fse.h
+++ b/lib/common/fse.h
@@ -229,6 +229,7 @@ If there is an error, the function will return an error code, which can be teste
 
 #endif  /* FSE_H */
 
+
 #if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
 #define FSE_H_FSE_STATIC_LINKING_ONLY
 
@@ -464,13 +465,13 @@ MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, un
     FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
     const U16* const stateTable = (const U16*)(statePtr->stateTable);
     U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
-    BIT_addBits(bitC, statePtr->value, nbBitsOut);
+    BIT_addBits(bitC,  (size_t)statePtr->value, nbBitsOut);
     statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
 }
 
 MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
 {
-    BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
+    BIT_addBits(bitC, (size_t)statePtr->value, statePtr->stateLog);
     BIT_flushBits(bitC);
 }
 
diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c
index f9bee931f1b..0dcc4640d09 100644
--- a/lib/common/fse_decompress.c
+++ b/lib/common/fse_decompress.c
@@ -22,8 +22,7 @@
 #define FSE_STATIC_LINKING_ONLY
 #include "fse.h"
 #include "error_private.h"
-#define ZSTD_DEPS_NEED_MALLOC
-#include "zstd_deps.h"
+#include "zstd_deps.h"  /* ZSTD_memcpy */
 #include "bits.h"       /* ZSTD_highbit32 */
 
 
@@ -236,7 +235,6 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
 
 typedef struct {
     short ncount[FSE_MAX_SYMBOL_VALUE + 1];
-    FSE_DTable dtable[1]; /* actual size is dynamic - member just helps get a pointer easily */
 } FSE_DecompressWksp;
 
 
@@ -251,11 +249,15 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
     unsigned tableLog;
     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
     FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
-    FSE_DTable* const dtable = wksp->dtable;
+    size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable);
+    FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos;
 
-    DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
+    FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
     if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
 
+    /* correct offset to dtable depends on this property */
+    FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0);
+
     /* normal FSE decoding mode */
     {   size_t const NCountLength =
             FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);

From af971cec6572e156e26bc403cb42396e7d908ba1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Oct 2023 05:56:05 +0000
Subject: [PATCH 105/283] Bump actions/checkout from 4.1.0 to 4.1.1

Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.0 to 4.1.1.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/8ade135a41bc03ea155e62e844d188df1ea18608...b4ffde65f46336ab88eb53be808477a3936bae11)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
---
 .github/workflows/dev-long-tests.yml          | 50 ++++++-------
 .github/workflows/dev-short-tests.yml         | 74 +++++++++----------
 .../workflows/publish-release-artifacts.yml   |  2 +-
 .github/workflows/scorecards.yml              |  2 +-
 .github/workflows/windows-artifacts.yml       |  2 +-
 5 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml
index 2f2793bcd05..dbef98982d9 100644
--- a/.github/workflows/dev-long-tests.yml
+++ b/.github/workflows/dev-long-tests.yml
@@ -15,7 +15,7 @@ jobs:
   make-all:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: make all
       run: make all
 
@@ -26,7 +26,7 @@ jobs:
       DEVNULLRIGHTS: 1
       READFROMBLOCKDEVICE: 1
     steps:
-    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: make test
       run: make test
 
@@ -34,7 +34,7 @@ jobs:
   make-test-osx:
     runs-on: macos-latest
     steps:
-    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: OS-X test
       run: make test # make -c lib all doesn't work because of the fact that it's not a tty
 
@@ -45,7 +45,7 @@ jobs:
       DEVNULLRIGHTS: 1
       READFROMBLOCKDEVICE: 1
     steps:
-    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: make test
       run: |
         sudo apt-get -qqq update
@@ -55,21 +55,21 @@ jobs:
   no-intrinsics-fuzztest:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: no intrinsics fuzztest
       run: MOREFLAGS="-DZSTD_NO_INTRINSICS" make -C tests fuzztest
 
   tsan-zstreamtest:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: thread sanitizer zstreamtest
       run: CC=clang ZSTREAM_TESTTIME=-T3mn make tsan-test-zstream
 
   ubsan-zstreamtest:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: undefined behavior sanitizer zstreamtest
       run: CC=clang make uasan-test-zstream
 
@@ -77,7 +77,7 @@ jobs:
   tsan-fuzztest:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: thread sanitizer fuzztest
       run: CC=clang make tsan-fuzztest
 
@@ -85,7 +85,7 @@ jobs:
   big-tests-zstreamtest32:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: zstream tests in 32bit mode, with big tests
       run: |
         sudo apt-get -qqq update
@@ -96,7 +96,7 @@ jobs:
   gcc-8-asan-ubsan-testzstd:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: gcc-8 + ASan + UBSan + Test Zstd
       # See https://askubuntu.com/a/1428822
       run: |
@@ -108,14 +108,14 @@ jobs:
   clang-asan-ubsan-testzstd:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: clang + ASan + UBSan + Test Zstd
       run: CC=clang make -j uasan-test-zstd 
Date: Mon, 23 Oct 2023 17:38:09 +0200
Subject: [PATCH 106/283] Add target_include_directories because windows and
 macos need it for me

---
 build/cmake/README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/build/cmake/README.md b/build/cmake/README.md
index 022e6168869..4c9d3a08b68 100644
--- a/build/cmake/README.md
+++ b/build/cmake/README.md
@@ -64,6 +64,13 @@ target_link_libraries(
     PRIVATE
     libzstd_static
 )
+
+# On windows and macos this is needed
+target_include_directories(
+    ${PROJECT_NAME}
+    PRIVATE
+    ${zstd_SOURCE_DIR}/lib
+)
 ```
 
 ### referring

From 9446b1910cab25dd2eb93d76ca5e6168a6e70a51 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 30 Oct 2023 05:53:19 +0000
Subject: [PATCH 107/283] Bump ossf/scorecard-action from 2.2.0 to 2.3.1

Bumps [ossf/scorecard-action](https://github.com/ossf/scorecard-action) from 2.2.0 to 2.3.1.
- [Release notes](https://github.com/ossf/scorecard-action/releases)
- [Changelog](https://github.com/ossf/scorecard-action/blob/main/RELEASE.md)
- [Commits](https://github.com/ossf/scorecard-action/compare/08b4669551908b1024bb425080c797723083c031...0864cf19026789058feabb7e87baa5f140aac736)

---
updated-dependencies:
- dependency-name: ossf/scorecard-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
---
 .github/workflows/scorecards.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index b536c54ebdb..f8f1f2d80e1 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -32,7 +32,7 @@ jobs:
           persist-credentials: false
 
       - name: "Run analysis"
-        uses: ossf/scorecard-action@08b4669551908b1024bb425080c797723083c031 # tag=v2.2.0
+        uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # tag=v2.3.1
         with:
           results_file: results.sarif
           results_format: sarif

From b38d87b476b804d7948928d298c784deb875a93c Mon Sep 17 00:00:00 2001
From: elasota 
Date: Tue, 31 Oct 2023 01:17:23 -0400
Subject: [PATCH 108/283] Clarify that the log2 of the largest possible symbol
 is the maximum number of bits consumed

---
 doc/zstd_compression_format.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index cd7308de196..7b29ccec648 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -1083,7 +1083,7 @@ It depends on :
   Presuming an `Accuracy_Log` of 8,
   and presuming 100 probabilities points have already been distributed,
   the decoder may read any value from `0` to `256 - 100 + 1 == 157` (inclusive).
-  Therefore, it must read `log2sup(157) == 8` bits.
+  Therefore, it may read up to `log2sup(157) == 8` bits.
 
 - Value decoded : small values use 1 less bit :
   __example__ :

From 324cce4996d24af7b2cd86cf5eb1b9bd80de0a47 Mon Sep 17 00:00:00 2001
From: elasota 
Date: Tue, 31 Oct 2023 11:42:00 -0400
Subject: [PATCH 109/283] Add definition of "log2sup" function

---
 doc/zstd_compression_format.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index 7b29ccec648..0532a846f45 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -1083,7 +1083,8 @@ It depends on :
   Presuming an `Accuracy_Log` of 8,
   and presuming 100 probabilities points have already been distributed,
   the decoder may read any value from `0` to `256 - 100 + 1 == 157` (inclusive).
-  Therefore, it may read up to `log2sup(157) == 8` bits.
+  Therefore, it may read up to `log2sup(157) == 8` bits, where `log2sup(N)`
+  is the smallest integer `T` that satisfies `(1 << T) > N`.
 
 - Value decoded : small values use 1 less bit :
   __example__ :

From 4502ca5f422a4e3f0b8980d5a365fcc3f62e97e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoph=20Gr=C3=BCninger?= 
Date: Tue, 31 Oct 2023 23:30:43 +0100
Subject: [PATCH 110/283] [cmake] Require CMake version 3.5 or newer

More recent versions of CMake emit the following warning:
CMake Deprecation Warning at cmake/CMakeLists.txt:10 (cmake_minimum_required):
  Compatibility with CMake < 3.5 will be removed from a future version of
  CMake.

  Update the VERSION argument  value or use a ... suffix to tell
  CMake that the project does not need compatibility with older versions.
---
 build/cmake/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build/cmake/CMakeLists.txt b/build/cmake/CMakeLists.txt
index 0bffc87d933..7377d4550bf 100644
--- a/build/cmake/CMakeLists.txt
+++ b/build/cmake/CMakeLists.txt
@@ -7,7 +7,7 @@
 # in the COPYING file in the root directory of this source tree).
 # ################################################################
 
-cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
 
 # As of 2018-12-26 ZSTD has been validated to build with cmake version 3.13.2 new policies.
 # Set and use the newest cmake policies that are validated to work

From f013b1b504cc2065e8860cf90461cef9364d96b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoph=20Gr=C3=BCninger?= 
Date: Wed, 1 Nov 2023 08:51:45 +0100
Subject: [PATCH 111/283] [cmake] Remove code for compatibility with CMake <
 3.0

The required version of CMake is now 3.5.
---
 build/cmake/CMakeLists.txt | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

diff --git a/build/cmake/CMakeLists.txt b/build/cmake/CMakeLists.txt
index 7377d4550bf..023b998f556 100644
--- a/build/cmake/CMakeLists.txt
+++ b/build/cmake/CMakeLists.txt
@@ -13,10 +13,8 @@ cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
 # Set and use the newest cmake policies that are validated to work
 set(ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION "3")
 set(ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION "13") #Policies never changed at PATCH level
-if("${CMAKE_MAJOR_VERSION}" LESS 3)
-  set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}")
-elseif( "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}" EQUAL "${CMAKE_MAJOR_VERSION}" AND
-        "${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}" GREATER "${CMAKE_MINOR_VERSION}")
+if("${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}" EQUAL "${CMAKE_MAJOR_VERSION}" AND
+       "${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}" GREATER "${CMAKE_MINOR_VERSION}")
     set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}")
 else()
     set(ZSTD_CMAKE_POLICY_VERSION "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}.${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}.0")
@@ -32,24 +30,13 @@ set(LIBRARY_DIR ${ZSTD_SOURCE_DIR}/lib)
 include(GetZstdLibraryVersion)
 GetZstdLibraryVersion(${LIBRARY_DIR}/zstd.h zstd_VERSION_MAJOR zstd_VERSION_MINOR zstd_VERSION_PATCH)
 
-if( CMAKE_MAJOR_VERSION LESS 3 )
-  ## Provide cmake 3+ behavior for older versions of cmake
-  project(zstd)
-  set(PROJECT_VERSION_MAJOR ${zstd_VERSION_MAJOR})
-  set(PROJECT_VERSION_MINOR ${zstd_VERSION_MINOR})
-  set(PROJECT_VERSION_PATCH ${zstd_VERSION_PATCH})
-  set(PROJECT_VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}")
-  enable_language(C)   # Main library is in C
-  enable_language(ASM) # And ASM
-  enable_language(CXX) # Testing contributed code also utilizes CXX
-else()
-  project(zstd
-    VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}"
-    LANGUAGES C   # Main library is in C
-              ASM # And ASM
-              CXX # Testing contributed code also utilizes CXX
-    )
-endif()
+project(zstd
+  VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}"
+  LANGUAGES C   # Main library is in C
+            ASM # And ASM
+            CXX # Testing contributed code also utilizes CXX
+  )
+
 message(STATUS "ZSTD VERSION: ${zstd_VERSION}")
 set(zstd_HOMEPAGE_URL "https://facebook.github.io/zstd")
 set(zstd_DESCRIPTION  "Zstandard is a real-time compression algorithm, providing high compression ratios.")

From c53d650d9a047ab12b2c7e5808878aff37d3cfc5 Mon Sep 17 00:00:00 2001
From: Theodore Tsirpanis 
Date: Tue, 7 Nov 2023 14:35:43 +0200
Subject: [PATCH 112/283] Export a `zstd::libzstd` target if only static or
 dynamic linkage is specified.

---
 build/cmake/CMakeLists.txt      | 11 ++++++-----
 build/cmake/zstdConfig.cmake    |  1 -
 build/cmake/zstdConfig.cmake.in | 16 ++++++++++++++++
 3 files changed, 22 insertions(+), 6 deletions(-)
 delete mode 100644 build/cmake/zstdConfig.cmake
 create mode 100644 build/cmake/zstdConfig.cmake.in

diff --git a/build/cmake/CMakeLists.txt b/build/cmake/CMakeLists.txt
index 0bffc87d933..6f68bc4255c 100644
--- a/build/cmake/CMakeLists.txt
+++ b/build/cmake/CMakeLists.txt
@@ -193,10 +193,6 @@ export(EXPORT zstdExports
     FILE "${CMAKE_CURRENT_BINARY_DIR}/zstdTargets.cmake"
     NAMESPACE zstd::
     )
-configure_file(zstdConfig.cmake
-    "${CMAKE_CURRENT_BINARY_DIR}/zstdConfig.cmake"
-    COPYONLY
-    )
 
 # A Package Config file that works from the installation directory
 set(ConfigPackageLocation ${CMAKE_INSTALL_LIBDIR}/cmake/zstd)
@@ -205,8 +201,13 @@ install(EXPORT zstdExports
     NAMESPACE zstd::
     DESTINATION ${ConfigPackageLocation}
     )
+configure_package_config_file(
+    zstdConfig.cmake.in
+    "${CMAKE_CURRENT_BINARY_DIR}/zstdConfig.cmake"
+    INSTALL_DESTINATION ${ConfigPackageLocation}
+)
 install(FILES
-    zstdConfig.cmake
+    "${CMAKE_CURRENT_BINARY_DIR}/zstdConfig.cmake"
     "${CMAKE_CURRENT_BINARY_DIR}/zstdConfigVersion.cmake"
     DESTINATION ${ConfigPackageLocation}
     )
diff --git a/build/cmake/zstdConfig.cmake b/build/cmake/zstdConfig.cmake
deleted file mode 100644
index ebbfcc38f6f..00000000000
--- a/build/cmake/zstdConfig.cmake
+++ /dev/null
@@ -1 +0,0 @@
-include("${CMAKE_CURRENT_LIST_DIR}/zstdTargets.cmake")
diff --git a/build/cmake/zstdConfig.cmake.in b/build/cmake/zstdConfig.cmake.in
new file mode 100644
index 00000000000..752f3ab11ce
--- /dev/null
+++ b/build/cmake/zstdConfig.cmake.in
@@ -0,0 +1,16 @@
+@PACKAGE_INIT@
+
+include(CMakeFindDependencyMacro)
+if(@ZSTD_MULTITHREAD_SUPPORT@ AND "@UNIX@")
+  find_dependency(Threads)
+endif()
+
+include("${CMAKE_CURRENT_LIST_DIR}/zstdTargets.cmake")
+
+if(@ZSTD_BUILD_SHARED@ AND NOT @ZSTD_BUILD_STATIC@)
+  add_library(zstd::libzstd ALIAS zstd::libzstd_shared)
+elseif(NOT @ZSTD_BUILD_SHARED@ AND @ZSTD_BUILD_STATIC@)
+  add_library(zstd::libzstd ALIAS zstd::libzstd_static)
+endif()
+
+check_required_components("zstd")

From 98d8ad27a2b2a2fc75e0594bae992824c470f61c Mon Sep 17 00:00:00 2001
From: John Hughes 
Date: Wed, 8 Nov 2023 09:08:21 +0000
Subject: [PATCH 113/283] Add Bazel module instructions to README.md

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 89857bf9bee..0f7478e1065 100644
--- a/README.md
+++ b/README.md
@@ -198,6 +198,10 @@ Going into `build` directory, you will find additional possibilities:
 You can build the zstd binary via buck by executing: `buck build programs:zstd` from the root of the repo.
 The output binary will be in `buck-out/gen/programs/`.
 
+### Bazel
+
+You easily can integrate zstd into your Bazel project by using the module hosted on the [Bazel Central Repository](https://registry.bazel.build/modules/zstd).
+
 ## Testing
 
 You can run quick local smoke tests by running `make check`.

From e61e3ff15208432cecf09ede09e8ebcf1d126bdd Mon Sep 17 00:00:00 2001
From: elasota <1137273+elasota@users.noreply.github.com>
Date: Wed, 8 Nov 2023 20:06:58 -0500
Subject: [PATCH 114/283] Clarify that decoding too many Huffman weights is a
 failure condition

---
 doc/zstd_compression_format.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index 0532a846f45..b64ddc3bff2 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -1353,6 +1353,9 @@ If updating state after decoding a symbol would require more bits than
 remain in the stream, it is assumed that extra bits are 0.  Then,
 symbols for each of the final states are decoded and the process is complete.
 
+If this process would produce more weights than the maximum number of decoded
+weights (255), then the data is considered corrupted.
+
 #### Conversion from weights to Huffman prefix codes
 
 All present symbols shall now have a `Weight` value.

From 52e41b9ac8010da90bbe97421cca533afd6914c0 Mon Sep 17 00:00:00 2001
From: elasota <1137273+elasota@users.noreply.github.com>
Date: Thu, 9 Nov 2023 12:22:27 -0500
Subject: [PATCH 115/283] Fix malformed state table

---
 doc/zstd_compression_format.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index 0532a846f45..b362b206dfd 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -1187,9 +1187,9 @@ Baseline is assigned starting from the higher states using fewer bits,
 increasing at each state, then resuming at the first state,
 each state takes its allocated width from Baseline.
 
-| state value      |   1   |  39   |   77   |  84  |  122   |
 | state order      |   0   |   1   |    2   |   3  |    4   |
 | ---------------- | ----- | ----- | ------ | ---- | ------ |
+| state value      |   1   |  39   |   77   |  84  |  122   |
 | width            |  32   |  32   |   32   |  16  |   16   |
 | `Number_of_Bits` |   5   |   5   |    5   |   4  |    4   |
 | range number     |   2   |   4   |    6   |   0  |    1   |

From c5bf96fb74378aaefec44f30f67f88f3f70f8e4e Mon Sep 17 00:00:00 2001
From: elasota <1137273+elasota@users.noreply.github.com>
Date: Mon, 13 Nov 2023 00:03:56 -0500
Subject: [PATCH 116/283] Clarify that a non-zero probability for an invalid
 symbol is invalid

---
 doc/zstd_compression_format.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index 0532a846f45..216d89ed150 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -1124,6 +1124,9 @@ When last symbol reaches cumulated total of `1 << Accuracy_Log`,
 decoding is complete.
 If the last symbol makes cumulated total go above `1 << Accuracy_Log`,
 distribution is considered corrupted.
+If this process results in a non-zero probability for a value outside of the
+valid range of values that the FSE table is defined for, even if that value is
+not used, then the data is considered corrupted.
 
 Then the decoder can tell how many bytes were used in this process,
 and how many symbols are present.

From 592b1acb1804f18e42412607a81c636dc1d4e850 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 13 Nov 2023 15:42:07 -0800
Subject: [PATCH 117/283] update xxhash to v0.8.2

List of updates : https://github.com/Cyan4973/xxHash/releases/tag/v0.8.2

This is also a preparation task before taking care of #3819
---
 lib/common/xxhash.c |   41 +-
 lib/common/xxhash.h | 3392 ++++++++++++++++++++++++++++++-------------
 2 files changed, 2412 insertions(+), 1021 deletions(-)

diff --git a/lib/common/xxhash.c b/lib/common/xxhash.c
index fd237c9062a..1a8f735ba2d 100644
--- a/lib/common/xxhash.c
+++ b/lib/common/xxhash.c
@@ -1,17 +1,36 @@
 /*
- *  xxHash - Fast Hash algorithm
- *  Copyright (c) Meta Platforms, Inc. and affiliates.
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (C) 2012-2023 Yann Collet
  *
- *  You can contact the author at :
- *  - xxHash homepage: https://cyan4973.github.io/xxHash/
- *  - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
  *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-*/
-
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other materials provided with the
+ *      distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ *   - xxHash homepage: https://www.xxhash.com
+ *   - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
 
 
 /*
diff --git a/lib/common/xxhash.h b/lib/common/xxhash.h
index 69572117a69..fe3abc1b461 100644
--- a/lib/common/xxhash.h
+++ b/lib/common/xxhash.h
@@ -1,17 +1,39 @@
 /*
- *  xxHash - Fast Hash algorithm
- *  Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- *  You can contact the author at :
- *  - xxHash homepage: https://cyan4973.github.io/xxHash/
- *  - xxHash source repository : https://github.com/Cyan4973/xxHash
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-*/
-
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (C) 2012-2023 Yann Collet
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other materials provided with the
+ *      distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ *   - xxHash homepage: https://www.xxhash.com
+ *   - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+/* Local adaptations for Zstandard */
 
 #ifndef XXH_NO_XXH3
 # define XXH_NO_XXH3
@@ -24,46 +46,210 @@
 /*!
  * @mainpage xxHash
  *
+ * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed
+ * limits.
+ *
+ * It is proposed in four flavors, in three families:
+ * 1. @ref XXH32_family
+ *   - Classic 32-bit hash function. Simple, compact, and runs on almost all
+ *     32-bit and 64-bit systems.
+ * 2. @ref XXH64_family
+ *   - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most
+ *     64-bit systems (but _not_ 32-bit systems).
+ * 3. @ref XXH3_family
+ *   - Modern 64-bit and 128-bit hash function family which features improved
+ *     strength and performance across the board, especially on smaller data.
+ *     It benefits greatly from SIMD and 64-bit without requiring it.
+ *
+ * Benchmarks
+ * ---
+ * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04.
+ * The open source benchmark program is compiled with clang v10.0 using -O3 flag.
+ *
+ * | Hash Name            | ISA ext | Width | Large Data Speed | Small Data Velocity |
+ * | -------------------- | ------- | ----: | ---------------: | ------------------: |
+ * | XXH3_64bits()        | @b AVX2 |    64 |        59.4 GB/s |               133.1 |
+ * | MeowHash             | AES-NI  |   128 |        58.2 GB/s |                52.5 |
+ * | XXH3_128bits()       | @b AVX2 |   128 |        57.9 GB/s |               118.1 |
+ * | CLHash               | PCLMUL  |    64 |        37.1 GB/s |                58.1 |
+ * | XXH3_64bits()        | @b SSE2 |    64 |        31.5 GB/s |               133.1 |
+ * | XXH3_128bits()       | @b SSE2 |   128 |        29.6 GB/s |               118.1 |
+ * | RAM sequential read  |         |   N/A |        28.0 GB/s |                 N/A |
+ * | ahash                | AES-NI  |    64 |        22.5 GB/s |               107.2 |
+ * | City64               |         |    64 |        22.0 GB/s |                76.6 |
+ * | T1ha2                |         |    64 |        22.0 GB/s |                99.0 |
+ * | City128              |         |   128 |        21.7 GB/s |                57.7 |
+ * | FarmHash             | AES-NI  |    64 |        21.3 GB/s |                71.9 |
+ * | XXH64()              |         |    64 |        19.4 GB/s |                71.0 |
+ * | SpookyHash           |         |    64 |        19.3 GB/s |                53.2 |
+ * | Mum                  |         |    64 |        18.0 GB/s |                67.0 |
+ * | CRC32C               | SSE4.2  |    32 |        13.0 GB/s |                57.9 |
+ * | XXH32()              |         |    32 |         9.7 GB/s |                71.9 |
+ * | City32               |         |    32 |         9.1 GB/s |                66.0 |
+ * | Blake3*              | @b AVX2 |   256 |         4.4 GB/s |                 8.1 |
+ * | Murmur3              |         |    32 |         3.9 GB/s |                56.1 |
+ * | SipHash*             |         |    64 |         3.0 GB/s |                43.2 |
+ * | Blake3*              | @b SSE2 |   256 |         2.4 GB/s |                 8.1 |
+ * | HighwayHash          |         |    64 |         1.4 GB/s |                 6.0 |
+ * | FNV64                |         |    64 |         1.2 GB/s |                62.7 |
+ * | Blake2*              |         |   256 |         1.1 GB/s |                 5.1 |
+ * | SHA1*                |         |   160 |         0.8 GB/s |                 5.6 |
+ * | MD5*                 |         |   128 |         0.6 GB/s |                 7.8 |
+ * @note
+ *   - Hashes which require a specific ISA extension are noted. SSE2 is also noted,
+ *     even though it is mandatory on x64.
+ *   - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic
+ *     by modern standards.
+ *   - Small data velocity is a rough average of algorithm's efficiency for small
+ *     data. For more accurate information, see the wiki.
+ *   - More benchmarks and strength tests are found on the wiki:
+ *         https://github.com/Cyan4973/xxHash/wiki
+ *
+ * Usage
+ * ------
+ * All xxHash variants use a similar API. Changing the algorithm is a trivial
+ * substitution.
+ *
+ * @pre
+ *    For functions which take an input and length parameter, the following
+ *    requirements are assumed:
+ *    - The range from [`input`, `input + length`) is valid, readable memory.
+ *      - The only exception is if the `length` is `0`, `input` may be `NULL`.
+ *    - For C++, the objects must have the *TriviallyCopyable* property, as the
+ *      functions access bytes directly as if it was an array of `unsigned char`.
+ *
+ * @anchor single_shot_example
+ * **Single Shot**
+ *
+ * These functions are stateless functions which hash a contiguous block of memory,
+ * immediately returning the result. They are the easiest and usually the fastest
+ * option.
+ *
+ * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits()
+ *
+ * @code{.c}
+ *   #include 
+ *   #include "xxhash.h"
+ *
+ *   // Example for a function which hashes a null terminated string with XXH32().
+ *   XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed)
+ *   {
+ *       // NULL pointers are only valid if the length is zero
+ *       size_t length = (string == NULL) ? 0 : strlen(string);
+ *       return XXH32(string, length, seed);
+ *   }
+ * @endcode
+ *
+ *
+ * @anchor streaming_example
+ * **Streaming**
+ *
+ * These groups of functions allow incremental hashing of unknown size, even
+ * more than what would fit in a size_t.
+ *
+ * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset()
+ *
+ * @code{.c}
+ *   #include 
+ *   #include 
+ *   #include "xxhash.h"
+ *   // Example for a function which hashes a FILE incrementally with XXH3_64bits().
+ *   XXH64_hash_t hashFile(FILE* f)
+ *   {
+ *       // Allocate a state struct. Do not just use malloc() or new.
+ *       XXH3_state_t* state = XXH3_createState();
+ *       assert(state != NULL && "Out of memory!");
+ *       // Reset the state to start a new hashing session.
+ *       XXH3_64bits_reset(state);
+ *       char buffer[4096];
+ *       size_t count;
+ *       // Read the file in chunks
+ *       while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) {
+ *           // Run update() as many times as necessary to process the data
+ *           XXH3_64bits_update(state, buffer, count);
+ *       }
+ *       // Retrieve the finalized hash. This will not change the state.
+ *       XXH64_hash_t result = XXH3_64bits_digest(state);
+ *       // Free the state. Do not use free().
+ *       XXH3_freeState(state);
+ *       return result;
+ *   }
+ * @endcode
+ *
+ * Streaming functions generate the xxHash value from an incremental input.
+ * This method is slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * An XXH state must first be allocated using `XXH*_createState()`.
+ *
+ * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
+ *
+ * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
+ *
+ * The function returns an error code, with 0 meaning OK, and any other value
+ * meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a
+ * digest, and generate new hash values later on by invoking `XXH*_digest()`.
+ *
+ * When done, release the state using `XXH*_freeState()`.
+ *
+ *
+ * @anchor canonical_representation_example
+ * **Canonical Representation**
+ *
+ * The default return values from XXH functions are unsigned 32, 64 and 128 bit
+ * integers.
+ * This the simplest and fastest format for further post-processing.
+ *
+ * However, this leaves open the question of what is the order on the byte level,
+ * since little and big endian conventions will store the same number differently.
+ *
+ * The canonical representation settles this issue by mandating big-endian
+ * convention, the same convention as human-readable numbers (large digits first).
+ *
+ * When writing hash values to storage, sending them over a network, or printing
+ * them, it's highly recommended to use the canonical representation to ensure
+ * portability across a wider range of systems, present and future.
+ *
+ * The following functions allow transformation of hash values to and from
+ * canonical format.
+ *
+ * XXH32_canonicalFromHash(), XXH32_hashFromCanonical(),
+ * XXH64_canonicalFromHash(), XXH64_hashFromCanonical(),
+ * XXH128_canonicalFromHash(), XXH128_hashFromCanonical(),
+ *
+ * @code{.c}
+ *   #include 
+ *   #include "xxhash.h"
+ *
+ *   // Example for a function which prints XXH32_hash_t in human readable format
+ *   void printXxh32(XXH32_hash_t hash)
+ *   {
+ *       XXH32_canonical_t cano;
+ *       XXH32_canonicalFromHash(&cano, hash);
+ *       size_t i;
+ *       for(i = 0; i < sizeof(cano.digest); ++i) {
+ *           printf("%02x", cano.digest[i]);
+ *       }
+ *       printf("\n");
+ *   }
+ *
+ *   // Example for a function which converts XXH32_canonical_t to XXH32_hash_t
+ *   XXH32_hash_t convertCanonicalToXxh32(XXH32_canonical_t cano)
+ *   {
+ *       XXH32_hash_t hash = XXH32_hashFromCanonical(&cano);
+ *       return hash;
+ *   }
+ * @endcode
+ *
+ *
  * @file xxhash.h
  * xxHash prototypes and implementation
  */
-/* TODO: update */
-/* Notice extracted from xxHash homepage:
-
-xxHash is an extremely fast hash algorithm, running at RAM speed limits.
-It also successfully passes all tests from the SMHasher suite.
-
-Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
-
-Name            Speed       Q.Score   Author
-xxHash          5.4 GB/s     10
-CrapWow         3.2 GB/s      2       Andrew
-MurmurHash 3a   2.7 GB/s     10       Austin Appleby
-SpookyHash      2.0 GB/s     10       Bob Jenkins
-SBox            1.4 GB/s      9       Bret Mulvey
-Lookup3         1.2 GB/s      9       Bob Jenkins
-SuperFastHash   1.2 GB/s      1       Paul Hsieh
-CityHash64      1.05 GB/s    10       Pike & Alakuijala
-FNV             0.55 GB/s     5       Fowler, Noll, Vo
-CRC32           0.43 GB/s     9
-MD5-32          0.33 GB/s    10       Ronald L. Rivest
-SHA1-32         0.28 GB/s    10
-
-Q.Score is a measure of quality of the hash function.
-It depends on successfully passing SMHasher test set.
-10 is a perfect score.
-
-Note: SMHasher's CRC32 implementation is not the fastest one.
-Other speed-oriented implementations can be faster,
-especially in combination with PCLMUL instruction:
-https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
-
-A 64-bit version, named XXH64, is available since r35.
-It offers much better speed, but for 64-bit applications only.
-Name     Speed on 64 bits    Speed on 32 bits
-XXH64       13.8 GB/s            1.9 GB/s
-XXH32        6.8 GB/s            6.0 GB/s
-*/
 
 #if defined (__cplusplus)
 extern "C" {
@@ -73,21 +259,80 @@ extern "C" {
  *  INLINE mode
  ******************************/
 /*!
- * XXH_INLINE_ALL (and XXH_PRIVATE_API)
+ * @defgroup public Public API
+ * Contains details on the public xxHash functions.
+ * @{
+ */
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Gives access to internal state declaration, required for static allocation.
+ *
+ * Incompatible with dynamic linking, due to risks of ABI changes.
+ *
+ * Usage:
+ * @code{.c}
+ *     #define XXH_STATIC_LINKING_ONLY
+ *     #include "xxhash.h"
+ * @endcode
+ */
+#  define XXH_STATIC_LINKING_ONLY
+/* Do not undef XXH_STATIC_LINKING_ONLY for Doxygen */
+
+/*!
+ * @brief Gives access to internal definitions.
+ *
+ * Usage:
+ * @code{.c}
+ *     #define XXH_STATIC_LINKING_ONLY
+ *     #define XXH_IMPLEMENTATION
+ *     #include "xxhash.h"
+ * @endcode
+ */
+#  define XXH_IMPLEMENTATION
+/* Do not undef XXH_IMPLEMENTATION for Doxygen */
+
+/*!
+ * @brief Exposes the implementation and marks all functions as `inline`.
+ *
  * Use these build macros to inline xxhash into the target unit.
  * Inlining improves performance on small inputs, especially when the length is
  * expressed as a compile-time constant:
  *
- *      https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
+ *  https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
  *
  * It also keeps xxHash symbols private to the unit, so they are not exported.
  *
  * Usage:
+ * @code{.c}
  *     #define XXH_INLINE_ALL
  *     #include "xxhash.h"
- *
+ * @endcode
  * Do not compile and link xxhash.o as a separate object, as it is not useful.
  */
+#  define XXH_INLINE_ALL
+#  undef XXH_INLINE_ALL
+/*!
+ * @brief Exposes the implementation without marking functions as inline.
+ */
+#  define XXH_PRIVATE_API
+#  undef XXH_PRIVATE_API
+/*!
+ * @brief Emulate a namespace by transparently prefixing all symbols.
+ *
+ * If you want to include _and expose_ xxHash functions from within your own
+ * library, but also want to avoid symbol collisions with other libraries which
+ * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix
+ * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE
+ * (therefore, avoid empty or numeric values).
+ *
+ * Note that no change is required within the calling program as long as it
+ * includes `xxhash.h`: Regular symbol names will be automatically translated
+ * by this header.
+ */
+#  define XXH_NAMESPACE /* YOUR NAME HERE */
+#  undef XXH_NAMESPACE
+#endif
+
 #if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
     && !defined(XXH_INLINE_ALL_31684351384)
    /* this section should be traversed only once */
@@ -202,21 +447,13 @@ extern "C" {
 #  undef XXHASH_H_STATIC_13879238742
 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
 
-
-
 /* ****************************************************************
  *  Stable API
  *****************************************************************/
 #ifndef XXHASH_H_5627135585666179
 #define XXHASH_H_5627135585666179 1
 
-
-/*!
- * @defgroup public Public API
- * Contains details on the public xxHash functions.
- * @{
- */
-/* specific declaration modes for Windows */
+/*! @brief Marks a global symbol. */
 #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
 #  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
 #    ifdef XXH_EXPORT
@@ -229,24 +466,6 @@ extern "C" {
 #  endif
 #endif
 
-#ifdef XXH_DOXYGEN
-/*!
- * @brief Emulate a namespace by transparently prefixing all symbols.
- *
- * If you want to include _and expose_ xxHash functions from within your own
- * library, but also want to avoid symbol collisions with other libraries which
- * may also include xxHash, you can use XXH_NAMESPACE to automatically prefix
- * any public symbol from xxhash library with the value of XXH_NAMESPACE
- * (therefore, avoid empty or numeric values).
- *
- * Note that no change is required within the calling program as long as it
- * includes `xxhash.h`: Regular symbol names will be automatically translated
- * by this header.
- */
-#  define XXH_NAMESPACE /* YOUR NAME HERE */
-#  undef XXH_NAMESPACE
-#endif
-
 #ifdef XXH_NAMESPACE
 #  define XXH_CAT(A,B) A##B
 #  define XXH_NAME2(A,B) XXH_CAT(A,B)
@@ -306,12 +525,40 @@ extern "C" {
 #endif
 
 
+/* *************************************
+*  Compiler specifics
+***************************************/
+
+/* specific declaration modes for Windows */
+#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
+#  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+#    ifdef XXH_EXPORT
+#      define XXH_PUBLIC_API __declspec(dllexport)
+#    elif XXH_IMPORT
+#      define XXH_PUBLIC_API __declspec(dllimport)
+#    endif
+#  else
+#    define XXH_PUBLIC_API   /* do nothing */
+#  endif
+#endif
+
+#if defined (__GNUC__)
+# define XXH_CONSTF  __attribute__((const))
+# define XXH_PUREF   __attribute__((pure))
+# define XXH_MALLOCF __attribute__((malloc))
+#else
+# define XXH_CONSTF  /* disable */
+# define XXH_PUREF
+# define XXH_MALLOCF
+#endif
+
 /* *************************************
 *  Version
 ***************************************/
 #define XXH_VERSION_MAJOR    0
 #define XXH_VERSION_MINOR    8
-#define XXH_VERSION_RELEASE  1
+#define XXH_VERSION_RELEASE  2
+/*! @brief Version number, encoded as two digits each */
 #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
 
 /*!
@@ -320,16 +567,22 @@ extern "C" {
  * This is mostly useful when xxHash is compiled as a shared library,
  * since the returned value comes from the library, as opposed to header file.
  *
- * @return `XXH_VERSION_NUMBER` of the invoked library.
+ * @return @ref XXH_VERSION_NUMBER of the invoked library.
  */
-XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void);
 
 
 /* ****************************
 *  Common basic types
 ******************************/
 #include    /* size_t */
-typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+/*!
+ * @brief Exit code for the streaming API.
+ */
+typedef enum {
+    XXH_OK = 0, /*!< OK */
+    XXH_ERROR   /*!< Error */
+} XXH_errorcode;
 
 
 /*-**********************************************************************
@@ -353,37 +606,33 @@ typedef uint32_t XXH32_hash_t;
 #   include 
 #   if UINT_MAX == 0xFFFFFFFFUL
       typedef unsigned int XXH32_hash_t;
+#   elif ULONG_MAX == 0xFFFFFFFFUL
+      typedef unsigned long XXH32_hash_t;
 #   else
-#     if ULONG_MAX == 0xFFFFFFFFUL
-        typedef unsigned long XXH32_hash_t;
-#     else
-#       error "unsupported platform: need a 32-bit type"
-#     endif
+#     error "unsupported platform: need a 32-bit type"
 #   endif
 #endif
 
 /*!
  * @}
  *
- * @defgroup xxh32_family XXH32 family
+ * @defgroup XXH32_family XXH32 family
  * @ingroup public
  * Contains functions used in the classic 32-bit xxHash algorithm.
  *
  * @note
  *   XXH32 is useful for older platforms, with no or poor 64-bit performance.
- *   Note that @ref xxh3_family provides competitive speed
- *   for both 32-bit and 64-bit systems, and offers true 64/128 bit hash results.
+ *   Note that the @ref XXH3_family provides competitive speed for both 32-bit
+ *   and 64-bit systems, and offers true 64/128 bit hash results.
  *
- * @see @ref xxh64_family, @ref xxh3_family : Other xxHash families
- * @see @ref xxh32_impl for implementation details
+ * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families
+ * @see @ref XXH32_impl for implementation details
  * @{
  */
 
 /*!
  * @brief Calculates the 32-bit hash of @p input using xxHash32.
  *
- * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
- *
  * @param input The block of data to be hashed, at least @p length bytes in size.
  * @param length The length of @p input, in bytes.
  * @param seed The 32-bit seed to alter the hash's output predictably.
@@ -393,87 +642,44 @@ typedef uint32_t XXH32_hash_t;
  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
  *
- * @return The calculated 32-bit hash value.
- *
- * @see
- *    XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
- *    Direct equivalents for the other variants of xxHash.
- * @see
- *    XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
- */
-XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
-
-/*!
- * Streaming functions generate the xxHash value from an incremental input.
- * This method is slower than single-call functions, due to state management.
- * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
- *
- * An XXH state must first be allocated using `XXH*_createState()`.
- *
- * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
- *
- * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
- *
- * The function returns an error code, with 0 meaning OK, and any other value
- * meaning there is an error.
- *
- * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
- * This function returns the nn-bits hash as an int or long long.
- *
- * It's still possible to continue inserting input into the hash state after a
- * digest, and generate new hash values later on by invoking `XXH*_digest()`.
- *
- * When done, release the state using `XXH*_freeState()`.
- *
- * Example code for incrementally hashing a file:
- * @code{.c}
- *    #include 
- *    #include 
- *    #define BUFFER_SIZE 256
+ * @return The calculated 32-bit xxHash32 value.
  *
- *    // Note: XXH64 and XXH3 use the same interface.
- *    XXH32_hash_t
- *    hashFile(FILE* stream)
- *    {
- *        XXH32_state_t* state;
- *        unsigned char buf[BUFFER_SIZE];
- *        size_t amt;
- *        XXH32_hash_t hash;
- *
- *        state = XXH32_createState();       // Create a state
- *        assert(state != NULL);             // Error check here
- *        XXH32_reset(state, 0xbaad5eed);    // Reset state with our seed
- *        while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) {
- *            XXH32_update(state, buf, amt); // Hash the file in chunks
- *        }
- *        hash = XXH32_digest(state);        // Finalize the hash
- *        XXH32_freeState(state);            // Clean up
- *        return hash;
- *    }
- * @endcode
+ * @see @ref single_shot_example "Single Shot Example" for an example.
  */
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
 
+#ifndef XXH_NO_STREAM
 /*!
  * @typedef struct XXH32_state_s XXH32_state_t
  * @brief The opaque state struct for the XXH32 streaming API.
  *
  * @see XXH32_state_s for details.
+ * @see @ref streaming_example "Streaming Example"
  */
 typedef struct XXH32_state_s XXH32_state_t;
 
 /*!
  * @brief Allocates an @ref XXH32_state_t.
  *
- * Must be freed with XXH32_freeState().
- * @return An allocated XXH32_state_t on success, `NULL` on failure.
+ * @return An allocated pointer of @ref XXH32_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH32_freeState().
+ *
+ * @see @ref streaming_example "Streaming Example"
  */
-XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
 /*!
  * @brief Frees an @ref XXH32_state_t.
  *
- * Must be allocated with XXH32_createState().
  * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
- * @return XXH_OK.
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note @p statePtr must be allocated with XXH32_createState().
+ *
+ * @see @ref streaming_example "Streaming Example"
+ *
  */
 XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
 /*!
@@ -489,23 +695,24 @@ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_
 /*!
  * @brief Resets an @ref XXH32_state_t to begin a new hash.
  *
- * This function resets and seeds a state. Call it before @ref XXH32_update().
- *
  * @param statePtr The state struct to reset.
  * @param seed The 32-bit seed to alter the hash result predictably.
  *
  * @pre
  *   @p statePtr must not be `NULL`.
  *
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note This function resets and seeds a state. Call it before @ref XXH32_update().
+ *
+ * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
 
 /*!
  * @brief Consumes a block of @p input to an @ref XXH32_state_t.
  *
- * Call this to incrementally consume blocks of data.
- *
  * @param statePtr The state struct to update.
  * @param input The block of data to be hashed, at least @p length bytes in size.
  * @param length The length of @p input, in bytes.
@@ -517,46 +724,35 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t
  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
  *
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
+ *
+ * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
 
 /*!
  * @brief Returns the calculated hash value from an @ref XXH32_state_t.
  *
- * @note
- *   Calling XXH32_digest() will not affect @p statePtr, so you can update,
- *   digest, and update again.
- *
  * @param statePtr The state struct to calculate the hash from.
  *
  * @pre
  *  @p statePtr must not be `NULL`.
  *
- * @return The calculated xxHash32 value from that state.
- */
-XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
-
-/*******   Canonical representation   *******/
-
-/*
- * The default return values from XXH functions are unsigned 32 and 64 bit
- * integers.
- * This the simplest and fastest format for further post-processing.
- *
- * However, this leaves open the question of what is the order on the byte level,
- * since little and big endian conventions will store the same number differently.
- *
- * The canonical representation settles this issue by mandating big-endian
- * convention, the same convention as human-readable numbers (large digits first).
+ * @return The calculated 32-bit xxHash32 value from that state.
  *
- * When writing hash values to storage, sending them over a network, or printing
- * them, it's highly recommended to use the canonical representation to ensure
- * portability across a wider range of systems, present and future.
+ * @note
+ *   Calling XXH32_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
  *
- * The following functions allow transformation of hash values to and from
- * canonical format.
+ * @see @ref streaming_example "Streaming Example"
  */
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
+
+/*******   Canonical representation   *******/
 
 /*!
  * @brief Canonical (big endian) representation of @ref XXH32_hash_t.
@@ -568,11 +764,13 @@ typedef struct {
 /*!
  * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
  *
- * @param dst The @ref XXH32_canonical_t pointer to be stored to.
+ * @param dst  The @ref XXH32_canonical_t pointer to be stored to.
  * @param hash The @ref XXH32_hash_t to be converted.
  *
  * @pre
  *   @p dst must not be `NULL`.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
  */
 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
 
@@ -585,44 +783,75 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
  *   @p src must not be `NULL`.
  *
  * @return The converted hash.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
  */
-XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
 
 
+/*! @cond Doxygen ignores this part */
 #ifdef __has_attribute
 # define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
 #else
 # define XXH_HAS_ATTRIBUTE(x) 0
 #endif
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+/*
+ * C23 __STDC_VERSION__ number hasn't been specified yet. For now
+ * leave as `201711L` (C17 + 1).
+ * TODO: Update to correct value when its been specified.
+ */
+#define XXH_C23_VN 201711L
+/*! @endcond */
 
+/*! @cond Doxygen ignores this part */
 /* C-language Attributes are added in C23. */
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
 # define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
 #else
 # define XXH_HAS_C_ATTRIBUTE(x) 0
 #endif
+/*! @endcond */
 
+/*! @cond Doxygen ignores this part */
 #if defined(__cplusplus) && defined(__has_cpp_attribute)
 # define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
 #else
 # define XXH_HAS_CPP_ATTRIBUTE(x) 0
 #endif
+/*! @endcond */
 
+/*! @cond Doxygen ignores this part */
 /*
-Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
-introduced in CPP17 and C23.
-CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
-C23   : https://en.cppreference.com/w/c/language/attributes/fallthrough
-*/
-#if XXH_HAS_C_ATTRIBUTE(x)
-# define XXH_FALLTHROUGH [[fallthrough]]
-#elif XXH_HAS_CPP_ATTRIBUTE(x)
+ * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
+ * introduced in CPP17 and C23.
+ * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
+ * C23   : https://en.cppreference.com/w/c/language/attributes/fallthrough
+ */
+#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
 # define XXH_FALLTHROUGH [[fallthrough]]
 #elif XXH_HAS_ATTRIBUTE(__fallthrough__)
-# define XXH_FALLTHROUGH __attribute__ ((fallthrough))
+# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
+#else
+# define XXH_FALLTHROUGH /* fallthrough */
+#endif
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+/*
+ * Define XXH_NOESCAPE for annotated pointers in public API.
+ * https://clang.llvm.org/docs/AttributeReference.html#noescape
+ * As of writing this, only supported by clang.
+ */
+#if XXH_HAS_ATTRIBUTE(noescape)
+# define XXH_NOESCAPE __attribute__((noescape))
 #else
-# define XXH_FALLTHROUGH
+# define XXH_NOESCAPE
 #endif
+/*! @endcond */
+
 
 /*!
  * @}
@@ -660,7 +889,7 @@ typedef uint64_t XXH64_hash_t;
 /*!
  * @}
  *
- * @defgroup xxh64_family XXH64 family
+ * @defgroup XXH64_family XXH64 family
  * @ingroup public
  * @{
  * Contains functions used in the classic 64-bit xxHash algorithm.
@@ -671,13 +900,9 @@ typedef uint64_t XXH64_hash_t;
  *   It provides better speed for systems with vector processing capabilities.
  */
 
-
 /*!
  * @brief Calculates the 64-bit hash of @p input using xxHash64.
  *
- * This function usually runs faster on 64-bit systems, but slower on 32-bit
- * systems (see benchmark).
- *
  * @param input The block of data to be hashed, at least @p length bytes in size.
  * @param length The length of @p input, in bytes.
  * @param seed The 64-bit seed to alter the hash's output predictably.
@@ -687,41 +912,156 @@ typedef uint64_t XXH64_hash_t;
  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
  *
- * @return The calculated 64-bit hash.
+ * @return The calculated 64-bit xxHash64 value.
  *
- * @see
- *    XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
- *    Direct equivalents for the other variants of xxHash.
- * @see
- *    XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
+ * @see @ref single_shot_example "Single Shot Example" for an example.
  */
-XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
 
 /*******   Streaming   *******/
+#ifndef XXH_NO_STREAM
 /*!
  * @brief The opaque state struct for the XXH64 streaming API.
  *
  * @see XXH64_state_s for details.
+ * @see @ref streaming_example "Streaming Example"
  */
 typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
-XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
-XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
-XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
-
-XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
-
-/*******   Canonical representation   *******/
-typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
-XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
-XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
 
-#ifndef XXH_NO_XXH3
 /*!
- * @}
- * ************************************************************************
- * @defgroup xxh3_family XXH3 family
+ * @brief Allocates an @ref XXH64_state_t.
+ *
+ * @return An allocated pointer of @ref XXH64_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH64_freeState().
+ *
+ * @see @ref streaming_example "Streaming Example"
+ */
+XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
+
+/*!
+ * @brief Frees an @ref XXH64_state_t.
+ *
+ * @param statePtr A pointer to an @ref XXH64_state_t allocated with @ref XXH64_createState().
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note @p statePtr must be allocated with XXH64_createState().
+ *
+ * @see @ref streaming_example "Streaming Example"
+ */
+XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+
+/*!
+ * @brief Copies one @ref XXH64_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
+ */
+XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH64_state_t to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note This function resets and seeds a state. Call it before @ref XXH64_update().
+ *
+ * @see @ref streaming_example "Streaming Example"
+ */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH64_state_t.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
+ *
+ * @see @ref streaming_example "Streaming Example"
+ */
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+
+/*!
+ * @brief Returns the calculated hash value from an @ref XXH64_state_t.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ *  @p statePtr must not be `NULL`.
+ *
+ * @return The calculated 64-bit xxHash64 value from that state.
+ *
+ * @note
+ *   Calling XXH64_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
+ *
+ * @see @ref streaming_example "Streaming Example"
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
+/*******   Canonical representation   *******/
+
+/*!
+ * @brief Canonical (big endian) representation of @ref XXH64_hash_t.
+ */
+typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
+
+/*!
+ * @brief Converts an @ref XXH64_hash_t to a big endian @ref XXH64_canonical_t.
+ *
+ * @param dst The @ref XXH64_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH64_hash_t to be converted.
+ *
+ * @pre
+ *   @p dst must not be `NULL`.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH64_canonical_t to a native @ref XXH64_hash_t.
+ *
+ * @param src The @ref XXH64_canonical_t to convert.
+ *
+ * @pre
+ *   @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src);
+
+#ifndef XXH_NO_XXH3
+
+/*!
+ * @}
+ * ************************************************************************
+ * @defgroup XXH3_family XXH3 family
  * @ingroup public
  * @{
  *
@@ -741,16 +1081,26 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
  *
  * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
  * but does not require it.
- * Any 32-bit and 64-bit targets that can run XXH32 smoothly
- * can run XXH3 at competitive speeds, even without vector support.
- * Further details are explained in the implementation.
- *
- * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,
- * ZVector and scalar targets. This can be controlled via the XXH_VECTOR macro.
+ * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3
+ * at competitive speeds, even without vector support. Further details are
+ * explained in the implementation.
+ *
+ * XXH3 has a fast scalar implementation, but it also includes accelerated SIMD
+ * implementations for many common platforms:
+ *   - AVX512
+ *   - AVX2
+ *   - SSE2
+ *   - ARM NEON
+ *   - WebAssembly SIMD128
+ *   - POWER8 VSX
+ *   - s390x ZVector
+ * This can be controlled via the @ref XXH_VECTOR macro, but it automatically
+ * selects the best version according to predefined macros. For the x86 family, an
+ * automatic runtime dispatcher is included separately in @ref xxh_x86dispatch.c.
  *
  * XXH3 implementation is portable:
  * it has a generic C90 formulation that can be compiled on any platform,
- * all implementations generage exactly the same hash value on all platforms.
+ * all implementations generate exactly the same hash value on all platforms.
  * Starting from v0.8.0, it's also labelled "stable", meaning that
  * any future version will also generate the same hash value.
  *
@@ -762,24 +1112,59 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
  *
  * The API supports one-shot hashing, streaming mode, and custom secrets.
  */
-
 /*-**********************************************************************
 *  XXH3 64-bit variant
 ************************************************************************/
 
-/* XXH3_64bits():
- * default 64-bit variant, using default secret and default seed of 0.
- * It's the fastest variant. */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);
+/*!
+ * @brief Calculates 64-bit unseeded variant of XXH3 hash of @p input.
+ *
+ * @param input  The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
+ *
+ * @note
+ *   This is equivalent to @ref XXH3_64bits_withSeed() with a seed of `0`, however
+ *   it may have slightly better performance due to constant propagation of the
+ *   defaults.
+ *
+ * @see
+ *    XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length);
 
-/*
- * XXH3_64bits_withSeed():
- * This variant generates a custom secret on the fly
- * based on default secret altered using the `seed` value.
+/*!
+ * @brief Calculates 64-bit seeded variant of XXH3 hash of @p input.
+ *
+ * @param input  The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed   The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
+ *
+ * @note
+ *    seed == 0 produces the same results as @ref XXH3_64bits().
+ *
+ * This variant generates a custom secret on the fly based on default secret
+ * altered using the @p seed value.
+ *
  * While this operation is decently fast, note that it's not completely free.
- * Note: seed==0 produces the same results as XXH3_64bits().
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
  */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
 
 /*!
  * The bare minimum size for a custom secret.
@@ -790,27 +1175,43 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, X
  */
 #define XXH3_SECRET_SIZE_MIN 136
 
-/*
- * XXH3_64bits_withSecret():
+/*!
+ * @brief Calculates 64-bit variant of XXH3 with a custom "secret".
+ *
+ * @param data       The block of data to be hashed, at least @p len bytes in size.
+ * @param len        The length of @p data, in bytes.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
+ *
+ * @pre
+ *   The memory between @p data and @p data + @p len must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p data may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
  * It's possible to provide any blob of bytes as a "secret" to generate the hash.
  * This makes it more difficult for an external actor to prepare an intentional collision.
- * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
+ * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).
  * However, the quality of the secret impacts the dispersion of the hash algorithm.
  * Therefore, the secret _must_ look like a bunch of random bytes.
  * Avoid "trivial" or structured data such as repeated sequences or a text document.
  * Whenever in doubt about the "randomness" of the blob of bytes,
- * consider employing "XXH3_generateSecret()" instead (see below).
+ * consider employing @ref XXH3_generateSecret() instead (see below).
  * It will generate a proper high entropy secret derived from the blob of bytes.
  * Another advantage of using XXH3_generateSecret() is that
  * it guarantees that all bits within the initial blob of bytes
  * will impact every bit of the output.
  * This is not necessarily the case when using the blob of bytes directly
  * because, when hashing _small_ inputs, only a portion of the secret is employed.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
  */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
 
 
 /*******   Streaming   *******/
+#ifndef XXH_NO_STREAM
 /*
  * Streaming requires state maintenance.
  * This operation costs memory and CPU.
@@ -819,40 +1220,135 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len,
  */
 
 /*!
- * @brief The state struct for the XXH3 streaming API.
+ * @brief The opaque state struct for the XXH3 streaming API.
  *
  * @see XXH3_state_s for details.
+ * @see @ref streaming_example "Streaming Example"
  */
 typedef struct XXH3_state_s XXH3_state_t;
-XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);
+XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
 XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
-XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
 
-/*
- * XXH3_64bits_reset():
- * Initialize with default parameters.
- * digest will be equivalent to `XXH3_64bits()`.
+/*!
+ * @brief Copies one @ref XXH3_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
  */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
-/*
- * XXH3_64bits_reset_withSeed():
- * Generate a custom secret from `seed`, and store it into `statePtr`.
- * digest will be equivalent to `XXH3_64bits_withSeed()`.
+XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   - This function resets `statePtr` and generate a secret with default parameters.
+ *   - Call this function before @ref XXH3_64bits_update().
+ *   - Digest will be equivalent to `XXH3_64bits()`.
+ *
+ * @see @ref streaming_example "Streaming Example"
+ *
  */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
-/*
- * XXH3_64bits_reset_withSecret():
- * `secret` is referenced, it _must outlive_ the hash streaming session.
- * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed     The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   - This function resets `statePtr` and generate a secret from `seed`.
+ *   - Call this function before @ref XXH3_64bits_update().
+ *   - Digest will be equivalent to `XXH3_64bits_withSeed()`.
+ *
+ * @see @ref streaming_example "Streaming Example"
+ *
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   `secret` is referenced, it _must outlive_ the hash streaming session.
+ *
+ * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,
  * and the quality of produced hash values depends on secret's entropy
  * (secret's content should look like a bunch of random bytes).
  * When in doubt about the randomness of a candidate `secret`,
  * consider employing `XXH3_generateSecret()` instead (see below).
+ *
+ * @see @ref streaming_example "Streaming Example"
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH3_state_t.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
+ *
+ * @see @ref streaming_example "Streaming Example"
  */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
 
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH64_hash_t  XXH3_64bits_digest (const XXH3_state_t* statePtr);
+/*!
+ * @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ *  @p statePtr must not be `NULL`.
+ *
+ * @return The calculated XXH3 64-bit hash value from that state.
+ *
+ * @note
+ *   Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
+ *
+ * @see @ref streaming_example "Streaming Example"
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t  XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
 
 /* note : canonical representation of XXH3 is the same as XXH64
  * since they both produce XXH64_hash_t values */
@@ -873,11 +1369,76 @@ typedef struct {
     XXH64_hash_t high64;  /*!< `value >> 64` */
 } XXH128_hash_t;
 
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+/*!
+ * @brief Calculates 128-bit unseeded variant of XXH3 of @p data.
+ *
+ * @param data The block of data to be hashed, at least @p length bytes in size.
+ * @param len  The length of @p data, in bytes.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
+ *
+ * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead
+ * for shorter inputs.
+ *
+ * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of `0`, however
+ * it may have slightly better performance due to constant propagation of the
+ * defaults.
+ *
+ * @see XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len);
+/*! @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param data The block of data to be hashed, at least @p length bytes in size.
+ * @param len  The length of @p data, in bytes.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
+ *
+ * @note
+ *    seed == 0 produces the same results as @ref XXH3_64bits().
+ *
+ * This variant generates a custom secret on the fly based on default secret
+ * altered using the @p seed value.
+ *
+ * While this operation is decently fast, note that it's not completely free.
+ *
+ * @see XXH3_128bits(), XXH3_128bits_withSecret(): other seeding variants
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
+/*!
+ * @brief Calculates 128-bit variant of XXH3 with a custom "secret".
+ *
+ * @param data       The block of data to be hashed, at least @p len bytes in size.
+ * @param len        The length of @p data, in bytes.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
+ *
+ * It's possible to provide any blob of bytes as a "secret" to generate the hash.
+ * This makes it more difficult for an external actor to prepare an intentional collision.
+ * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).
+ * However, the quality of the secret impacts the dispersion of the hash algorithm.
+ * Therefore, the secret _must_ look like a bunch of random bytes.
+ * Avoid "trivial" or structured data such as repeated sequences or a text document.
+ * Whenever in doubt about the "randomness" of the blob of bytes,
+ * consider employing @ref XXH3_generateSecret() instead (see below).
+ * It will generate a proper high entropy secret derived from the blob of bytes.
+ * Another advantage of using XXH3_generateSecret() is that
+ * it guarantees that all bits within the initial blob of bytes
+ * will impact every bit of the output.
+ * This is not necessarily the case when using the blob of bytes directly
+ * because, when hashing _small_ inputs, only a portion of the secret is employed.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
 
 /*******   Streaming   *******/
+#ifndef XXH_NO_STREAM
 /*
  * Streaming requires state maintenance.
  * This operation costs memory and CPU.
@@ -887,42 +1448,172 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t le
  * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
  * Use already declared XXH3_createState() and XXH3_freeState().
  *
- * All reset and streaming functions have same meaning as their 64-bit counterpart.
+ * All reset and streaming functions have same meaning as their 64-bit counterpart.
+ */
+
+/*!
+ * @brief Resets an @ref XXH3_state_t to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   - This function resets `statePtr` and generate a secret with default parameters.
+ *   - Call it before @ref XXH3_128bits_update().
+ *   - Digest will be equivalent to `XXH3_128bits()`.
+ *
+ * @see @ref streaming_example "Streaming Example"
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed     The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   - This function resets `statePtr` and generate a secret from `seed`.
+ *   - Call it before @ref XXH3_128bits_update().
+ *   - Digest will be equivalent to `XXH3_128bits_withSeed()`.
+ *
+ * @see @ref streaming_example "Streaming Example"
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr   The state struct to reset.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * `secret` is referenced, it _must outlive_ the hash streaming session.
+ * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,
+ * and the quality of produced hash values depends on secret's entropy
+ * (secret's content should look like a bunch of random bytes).
+ * When in doubt about the randomness of a candidate `secret`,
+ * consider employing `XXH3_generateSecret()` instead (see below).
+ *
+ * @see @ref streaming_example "Streaming Example"
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH3_state_t.
+ *
+ * Call this to incrementally consume blocks of data.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
  */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
 
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr);
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
-
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
+/*!
+ * @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ *  @p statePtr must not be `NULL`.
+ *
+ * @return The calculated XXH3 128-bit hash value from that state.
+ *
+ * @note
+ *   Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
+ *
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
 
 /* Following helper functions make it possible to compare XXH128_hast_t values.
  * Since XXH128_hash_t is a structure, this capability is not offered by the language.
  * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
 
 /*!
- * XXH128_isEqual():
- * Return: 1 if `h1` and `h2` are equal, 0 if they are not.
+ * @brief Check equality of two XXH128_hash_t values
+ *
+ * @param h1 The 128-bit hash value.
+ * @param h2 Another 128-bit hash value.
+ *
+ * @return `1` if `h1` and `h2` are equal.
+ * @return `0` if they are not.
  */
-XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
+XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
 
 /*!
- * XXH128_cmp():
+ * @brief Compares two @ref XXH128_hash_t
  *
  * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
  *
- * return: >0 if *h128_1  > *h128_2
- *         =0 if *h128_1 == *h128_2
- *         <0 if *h128_1  < *h128_2
+ * @param h128_1 Left-hand side value
+ * @param h128_2 Right-hand side value
+ *
+ * @return >0 if @p h128_1  > @p h128_2
+ * @return =0 if @p h128_1 == @p h128_2
+ * @return <0 if @p h128_1  < @p h128_2
  */
-XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2);
+XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
 
 
 /*******   Canonical representation   *******/
 typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
-XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);
-XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
+
+
+/*!
+ * @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t.
+ *
+ * @param dst  The @ref XXH128_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH128_hash_t to be converted.
+ *
+ * @pre
+ *   @p dst must not be `NULL`.
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH128_canonical_t to a native @ref XXH128_hash_t.
+ *
+ * @param src The @ref XXH128_canonical_t to convert.
+ *
+ * @pre
+ *   @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src);
 
 
 #endif  /* !XXH_NO_XXH3 */
@@ -996,7 +1687,6 @@ struct XXH64_state_s {
    XXH64_hash_t reserved64;   /*!< Reserved field. Do not read or write to it. */
 };   /* typedef'd to XXH64_state_t */
 
-
 #ifndef XXH_NO_XXH3
 
 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
@@ -1032,6 +1722,7 @@ struct XXH64_state_s {
 #define XXH3_INTERNALBUFFER_SIZE 256
 
 /*!
+ * @internal
  * @brief Default size of the secret buffer (and @ref XXH3_kSecret).
  *
  * This is the size used in @ref XXH3_kSecret and the seeded functions.
@@ -1064,7 +1755,7 @@ struct XXH64_state_s {
  */
 struct XXH3_state_s {
    XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
-       /*!< The 8 accumulators. Similar to `vN` in @ref XXH32_state_s::v1 and @ref XXH64_state_s */
+       /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */
    XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
        /*!< Used to store a custom secret generated from a seed. */
    XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
@@ -1104,69 +1795,148 @@ struct XXH3_state_s {
  * Note that this doesn't prepare the state for a streaming operation,
  * it's still necessary to use XXH3_NNbits_reset*() afterwards.
  */
-#define XXH3_INITSTATE(XXH3_state_ptr)   { (XXH3_state_ptr)->seed = 0; }
+#define XXH3_INITSTATE(XXH3_state_ptr)                       \
+    do {                                                     \
+        XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
+        tmp_xxh3_state_ptr->seed = 0;                        \
+        tmp_xxh3_state_ptr->extSecret = NULL;                \
+    } while(0)
 
 
-/* XXH128() :
- * simple alias to pre-selected XXH3_128bits variant
+/*!
+ * @brief Calculates the 128-bit hash of @p data using XXH3.
+ *
+ * @param data The block of data to be hashed, at least @p len bytes in size.
+ * @param len  The length of @p data, in bytes.
+ * @param seed The 64-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ *   The memory between @p data and @p data + @p len must be valid,
+ *   readable, contiguous memory. However, if @p len is `0`, @p data may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 128-bit XXH3 value.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
  */
-XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
 
 
 /* ===   Experimental API   === */
 /* Symbols defined below must be considered tied to a specific library version. */
 
-/*
- * XXH3_generateSecret():
+/*!
+ * @brief Derive a high-entropy secret from any user-defined content, named customSeed.
+ *
+ * @param secretBuffer    A writable buffer for derived high-entropy secret data.
+ * @param secretSize      Size of secretBuffer, in bytes.  Must be >= XXH3_SECRET_DEFAULT_SIZE.
+ * @param customSeed      A user-defined content.
+ * @param customSeedSize  Size of customSeed, in bytes.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
  *
- * Derive a high-entropy secret from any user-defined content, named customSeed.
  * The generated secret can be used in combination with `*_withSecret()` functions.
- * The `_withSecret()` variants are useful to provide a higher level of protection than 64-bit seed,
- * as it becomes much more difficult for an external actor to guess how to impact the calculation logic.
+ * The `_withSecret()` variants are useful to provide a higher level of protection
+ * than 64-bit seed, as it becomes much more difficult for an external actor to
+ * guess how to impact the calculation logic.
  *
  * The function accepts as input a custom seed of any length and any content,
- * and derives from it a high-entropy secret of length @secretSize
- * into an already allocated buffer @secretBuffer.
- * @secretSize must be >= XXH3_SECRET_SIZE_MIN
+ * and derives from it a high-entropy secret of length @p secretSize into an
+ * already allocated buffer @p secretBuffer.
  *
  * The generated secret can then be used with any `*_withSecret()` variant.
- * Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`,
- * `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()`
+ * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(),
+ * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret()
  * are part of this list. They all accept a `secret` parameter
- * which must be large enough for implementation reasons (>= XXH3_SECRET_SIZE_MIN)
+ * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN)
  * _and_ feature very high entropy (consist of random-looking bytes).
- * These conditions can be a high bar to meet, so
- * XXH3_generateSecret() can be employed to ensure proper quality.
+ * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can
+ * be employed to ensure proper quality.
+ *
+ * @p customSeed can be anything. It can have any size, even small ones,
+ * and its content can be anything, even "poor entropy" sources such as a bunch
+ * of zeroes. The resulting `secret` will nonetheless provide all required qualities.
  *
- * customSeed can be anything. It can have any size, even small ones,
- * and its content can be anything, even "poor entropy" sources such as a bunch of zeroes.
- * The resulting `secret` will nonetheless provide all required qualities.
+ * @pre
+ *   - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN
+ *   - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
  *
- * When customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
+ * Example code:
+ * @code{.c}
+ *    #include 
+ *    #include 
+ *    #include 
+ *    #define XXH_STATIC_LINKING_ONLY // expose unstable API
+ *    #include "xxhash.h"
+ *    // Hashes argv[2] using the entropy from argv[1].
+ *    int main(int argc, char* argv[])
+ *    {
+ *        char secret[XXH3_SECRET_SIZE_MIN];
+ *        if (argv != 3) { return 1; }
+ *        XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1]));
+ *        XXH64_hash_t h = XXH3_64bits_withSecret(
+ *             argv[2], strlen(argv[2]),
+ *             secret, sizeof(secret)
+ *        );
+ *        printf("%016llx\n", (unsigned long long) h);
+ *    }
+ * @endcode
  */
-XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize);
+XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);
 
-
-/*
- * XXH3_generateSecret_fromSeed():
- *
- * Generate the same secret as the _withSeed() variants.
+/*!
+ * @brief Generate the same secret as the _withSeed() variants.
  *
- * The resulting secret has a length of XXH3_SECRET_DEFAULT_SIZE (necessarily).
- * @secretBuffer must be already allocated, of size at least XXH3_SECRET_DEFAULT_SIZE bytes.
+ * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes
+ * @param seed         The 64-bit seed to alter the hash result predictably.
  *
  * The generated secret can be used in combination with
  *`*_withSecret()` and `_withSecretandSeed()` variants.
- * This generator is notably useful in combination with `_withSecretandSeed()`,
- * as a way to emulate a faster `_withSeed()` variant.
+ *
+ * Example C++ `std::string` hash class:
+ * @code{.cpp}
+ *    #include 
+ *    #define XXH_STATIC_LINKING_ONLY // expose unstable API
+ *    #include "xxhash.h"
+ *    // Slow, seeds each time
+ *    class HashSlow {
+ *        XXH64_hash_t seed;
+ *    public:
+ *        HashSlow(XXH64_hash_t s) : seed{s} {}
+ *        size_t operator()(const std::string& x) const {
+ *            return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)};
+ *        }
+ *    };
+ *    // Fast, caches the seeded secret for future uses.
+ *    class HashFast {
+ *        unsigned char secret[XXH3_SECRET_SIZE_MIN];
+ *    public:
+ *        HashFast(XXH64_hash_t s) {
+ *            XXH3_generateSecret_fromSeed(secret, seed);
+ *        }
+ *        size_t operator()(const std::string& x) const {
+ *            return size_t{
+ *                XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret))
+ *            };
+ *        }
+ *    };
+ * @endcode
  */
-XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed);
+XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
 
-/*
- * *_withSecretandSeed() :
+/*!
+ * @brief Calculates 64/128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param data       The block of data to be hashed, at least @p len bytes in size.
+ * @param len        The length of @p data, in bytes.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed       The 64-bit seed to alter the hash result predictably.
+ *
  * These variants generate hash values using either
- * @seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
- * or @secret for "large" keys (>= XXH3_MIDSIZE_MAX).
+ * @p seed for "short" keys (< @ref XXH3_MIDSIZE_MAX = 240 bytes)
+ * or @p secret for "large" keys (>= @ref XXH3_MIDSIZE_MAX).
  *
  * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
  * `_withSeed()` has to generate the secret on the fly for "large" keys.
@@ -1175,7 +1945,7 @@ XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_
  * which requires more instructions than _withSeed() variants.
  * Therefore, _withSecretandSeed variant combines the best of both worlds.
  *
- * When @secret has been generated by XXH3_generateSecret_fromSeed(),
+ * When @p secret has been generated by XXH3_generateSecret_fromSeed(),
  * this variant produces *exactly* the same results as `_withSeed()` variant,
  * hence offering only a pure speed benefit on "large" input,
  * by skipping the need to regenerate the secret for every large input.
@@ -1184,33 +1954,71 @@ XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_
  * for example with XXH3_64bits(), which then becomes the seed,
  * and then employ both the seed and the secret in _withSecretandSeed().
  * On top of speed, an added benefit is that each bit in the secret
- * has a 50% chance to swap each bit in the output,
- * via its impact to the seed.
+ * has a 50% chance to swap each bit in the output, via its impact to the seed.
+ *
  * This is not guaranteed when using the secret directly in "small data" scenarios,
  * because only portions of the secret are employed for small data.
  */
-XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecretandSeed(const void* data, size_t len,
-                              const void* secret, size_t secretSize,
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
+XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
+                              XXH_NOESCAPE const void* secret, size_t secretSize,
                               XXH64_hash_t seed);
-
-XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecretandSeed(const void* data, size_t len,
-                               const void* secret, size_t secretSize,
+/*!
+ * @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param input      The block of data to be hashed, at least @p len bytes in size.
+ * @param length     The length of @p data, in bytes.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed64     The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed()
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
+XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
+                               XXH_NOESCAPE const void* secret, size_t secretSize,
                                XXH64_hash_t seed64);
-
+#ifndef XXH_NO_STREAM
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr   A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed64     The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed()
+ */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
-                                    const void* secret, size_t secretSize,
+XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                    XXH_NOESCAPE const void* secret, size_t secretSize,
                                     XXH64_hash_t seed64);
-
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr   A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed64     The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed()
+ */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
-                                     const void* secret, size_t secretSize,
+XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                     XXH_NOESCAPE const void* secret, size_t secretSize,
                                      XXH64_hash_t seed64);
+#endif /* !XXH_NO_STREAM */
 
-
-#endif  /* XXH_NO_XXH3 */
+#endif  /* !XXH_NO_XXH3 */
 #endif  /* XXH_NO_LONG_LONG */
 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
 #  define XXH_IMPLEMENTATION
@@ -1264,7 +2072,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
 /*!
  * @brief Define this to disable 64-bit code.
  *
- * Useful if only using the @ref xxh32_family and you have a strict C90 compiler.
+ * Useful if only using the @ref XXH32_family and you have a strict C90 compiler.
  */
 #  define XXH_NO_LONG_LONG
 #  undef XXH_NO_LONG_LONG /* don't actually */
@@ -1287,7 +2095,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  *     Use `memcpy()`. Safe and portable. Note that most modern compilers will
  *     eliminate the function call and treat it as an unaligned access.
  *
- *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((packed))`
+ *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
  *   @par
  *     Depends on compiler extensions and is therefore not portable.
  *     This method is safe _if_ your compiler supports it,
@@ -1307,7 +2115,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  *     inline small `memcpy()` calls, and it might also be faster on big-endian
  *     systems which lack a native byteswap instruction. However, some compilers
  *     will emit literal byteshifts even if the target supports unaligned access.
- *  .
+ *
  *
  * @warning
  *   Methods 1 and 2 rely on implementation-defined behavior. Use these with
@@ -1320,6 +2128,34 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  */
 #  define XXH_FORCE_MEMORY_ACCESS 0
 
+/*!
+ * @def XXH_SIZE_OPT
+ * @brief Controls how much xxHash optimizes for size.
+ *
+ * xxHash, when compiled, tends to result in a rather large binary size. This
+ * is mostly due to heavy usage to forced inlining and constant folding of the
+ * @ref XXH3_family to increase performance.
+ *
+ * However, some developers prefer size over speed. This option can
+ * significantly reduce the size of the generated code. When using the `-Os`
+ * or `-Oz` options on GCC or Clang, this is defined to 1 by default,
+ * otherwise it is defined to 0.
+ *
+ * Most of these size optimizations can be controlled manually.
+ *
+ * This is a number from 0-2.
+ *  - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed
+ *    comes first.
+ *  - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more
+ *    conservative and disables hacks that increase code size. It implies the
+ *    options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,
+ *    and @ref XXH3_NEON_LANES == 8 if they are not already defined.
+ *  - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.
+ *    Performance may cry. For example, the single shot functions just use the
+ *    streaming API.
+ */
+#  define XXH_SIZE_OPT 0
+
 /*!
  * @def XXH_FORCE_ALIGN_CHECK
  * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
@@ -1341,9 +2177,11 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  *
  * In these cases, the alignment check can be removed by setting this macro to 0.
  * Then the code will always use unaligned memory access.
- * Align check is automatically disabled on x86, x64 & arm64,
+ * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips
  * which are platforms known to offer good unaligned memory accesses performance.
  *
+ * It is also disabled by default when @ref XXH_SIZE_OPT >= 1.
+ *
  * This option does not affect XXH3 (only XXH32 and XXH64).
  */
 #  define XXH_FORCE_ALIGN_CHECK 0
@@ -1365,11 +2203,28 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
  * compiler full control on whether to inline or not.
  *
- * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using
- * -fno-inline with GCC or Clang, this will automatically be defined.
+ * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if
+ * @ref XXH_SIZE_OPT >= 1, this will automatically be defined.
  */
 #  define XXH_NO_INLINE_HINTS 0
 
+/*!
+ * @def XXH3_INLINE_SECRET
+ * @brief Determines whether to inline the XXH3 withSecret code.
+ *
+ * When the secret size is known, the compiler can improve the performance
+ * of XXH3_64bits_withSecret() and XXH3_128bits_withSecret().
+ *
+ * However, if the secret size is not known, it doesn't have any benefit. This
+ * happens when xxHash is compiled into a global symbol. Therefore, if
+ * @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0.
+ *
+ * Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers
+ * that are *sometimes* force inline on -Og, and it is impossible to automatically
+ * detect this optimization level.
+ */
+#  define XXH3_INLINE_SECRET 0
+
 /*!
  * @def XXH32_ENDJMP
  * @brief Whether to use a jump for `XXH32_finalize`.
@@ -1391,34 +2246,45 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  */
 #  define XXH_OLD_NAMES
 #  undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
+
+/*!
+ * @def XXH_NO_STREAM
+ * @brief Disables the streaming API.
+ *
+ * When xxHash is not inlined and the streaming functions are not used, disabling
+ * the streaming functions can improve code size significantly, especially with
+ * the @ref XXH3_family which tends to make constant folded copies of itself.
+ */
+#  define XXH_NO_STREAM
+#  undef XXH_NO_STREAM /* don't actually */
 #endif /* XXH_DOXYGEN */
 /*!
  * @}
  */
 
 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-   /* prefer __packed__ structures (method 1) for gcc on armv7+ and mips */
-#  if !defined(__clang__) && \
-( \
-    (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
-    ( \
-        defined(__GNUC__) && ( \
-            (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \
-            ( \
-                defined(__mips__) && \
-                (__mips <= 5 || __mips_isa_rev < 6) && \
-                (!defined(__mips16) || defined(__mips_mips16e2)) \
-            ) \
-        ) \
-    ) \
-)
+   /* prefer __packed__ structures (method 1) for GCC
+    * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
+    * which for some reason does unaligned loads. */
+#  if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
 #    define XXH_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
 
+#ifndef XXH_SIZE_OPT
+   /* default to 1 for -Os or -Oz */
+#  if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
+#    define XXH_SIZE_OPT 1
+#  else
+#    define XXH_SIZE_OPT 0
+#  endif
+#endif
+
 #ifndef XXH_FORCE_ALIGN_CHECK  /* can be defined externally */
-#  if defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) \
-   || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64) /* visual */
+   /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
+#  if XXH_SIZE_OPT >= 1 || \
+      defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
+   || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64)    || defined(_M_ARM) /* visual */
 #    define XXH_FORCE_ALIGN_CHECK 0
 #  else
 #    define XXH_FORCE_ALIGN_CHECK 1
@@ -1426,14 +2292,22 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
 #endif
 
 #ifndef XXH_NO_INLINE_HINTS
-#  if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \
-   || defined(__NO_INLINE__)     /* -O0, -fno-inline */
+#  if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__)  /* -O0, -fno-inline */
 #    define XXH_NO_INLINE_HINTS 1
 #  else
 #    define XXH_NO_INLINE_HINTS 0
 #  endif
 #endif
 
+#ifndef XXH3_INLINE_SECRET
+#  if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \
+     || !defined(XXH_INLINE_ALL)
+#    define XXH3_INLINE_SECRET 0
+#  else
+#    define XXH3_INLINE_SECRET 1
+#  endif
+#endif
+
 #ifndef XXH32_ENDJMP
 /* generally preferable for performance */
 #  define XXH32_ENDJMP 0
@@ -1448,13 +2322,56 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
 /* *************************************
 *  Includes & Memory related functions
 ***************************************/
-/* Modify the local functions below should you wish to use some other memory routines */
-/* for ZSTD_malloc(), ZSTD_free() */
-#define ZSTD_DEPS_NEED_MALLOC
-#include "zstd_deps.h"  /* size_t, ZSTD_malloc, ZSTD_free, ZSTD_memcpy */
-static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); }
-static void  XXH_free  (void* p)  { ZSTD_free(p); }
-static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); }
+#if defined(XXH_NO_STREAM)
+/* nothing */
+#elif defined(XXH_NO_STDLIB)
+
+/* When requesting to disable any mention of stdlib,
+ * the library loses the ability to invoked malloc / free.
+ * In practice, it means that functions like `XXH*_createState()`
+ * will always fail, and return NULL.
+ * This flag is useful in situations where
+ * xxhash.h is integrated into some kernel, embedded or limited environment
+ * without access to dynamic allocation.
+ */
+
+static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
+static void XXH_free(void* p) { (void)p; }
+
+#else
+
+/*
+ * Modify the local functions below should you wish to use
+ * different memory routines for malloc() and free()
+ */
+#include 
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than malloc().
+ */
+static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than free().
+ */
+static void XXH_free(void* p) { free(p); }
+
+#endif  /* XXH_NO_STDLIB */
+
+#include 
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than memcpy().
+ */
+static void* XXH_memcpy(void* dest, const void* src, size_t size)
+{
+    return memcpy(dest,src,size);
+}
+
+#include    /* ULLONG_MAX */
 
 
 /* *************************************
@@ -1487,6 +2404,11 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_
 #  define XXH_NO_INLINE static
 #endif
 
+#if XXH3_INLINE_SECRET
+#  define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
+#else
+#  define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
+#endif
 
 
 /* *************************************
@@ -1512,14 +2434,17 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_
 #  include    /* note: can still be disabled with NDEBUG */
 #  define XXH_ASSERT(c)   assert(c)
 #else
-#  define XXH_ASSERT(c)   ((void)0)
+#  if defined(__INTEL_COMPILER)
+#    define XXH_ASSERT(c)   XXH_ASSUME((unsigned char) (c))
+#  else
+#    define XXH_ASSERT(c)   XXH_ASSUME(c)
+#  endif
 #endif
 
 /* note: use after variable declarations */
 #ifndef XXH_STATIC_ASSERT
 #  if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)    /* C11 */
-#    include 
-#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
+#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
 #  elif defined(__cplusplus) && (__cplusplus >= 201103L)            /* C++11 */
 #    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
 #  else
@@ -1534,7 +2459,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_
  * @brief Used to prevent unwanted optimizations for @p var.
  *
  * It uses an empty GCC inline assembly statement with a register constraint
- * which forces @p var into a general purpose register (e.g. eax, ebx, ecx
+ * which forces @p var into a general purpose register (eg eax, ebx, ecx
  * on x86) and marks it as modified.
  *
  * This is used in a few places to avoid unwanted autovectorization (e.g.
@@ -1545,11 +2470,19 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_
  * XXH3_initCustomSecret_scalar().
  */
 #if defined(__GNUC__) || defined(__clang__)
-#  define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
+#  define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))
 #else
 #  define XXH_COMPILER_GUARD(var) ((void)0)
 #endif
 
+/* Specifically for NEON vectors which use the "w" constraint, on
+ * Clang. */
+#if defined(__clang__) && defined(__ARM_ARCH) && !defined(__wasm__)
+#  define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w" (var))
+#else
+#  define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void)0)
+#endif
+
 /* *************************************
 *  Basic Types
 ***************************************/
@@ -1564,6 +2497,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_
 typedef XXH32_hash_t xxh_u32;
 
 #ifdef XXH_OLD_NAMES
+#  warning "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly"
 #  define BYTE xxh_u8
 #  define U8   xxh_u8
 #  define U32  xxh_u32
@@ -1637,18 +2571,19 @@ static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr;
 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
 
 /*
- * __pack instructions are safer but compiler specific, hence potentially
- * problematic for some compilers.
- *
- * Currently only defined for GCC and ICC.
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
+ * documentation claimed that it only increased the alignment, but actually it
+ * can decrease it on gcc, clang, and icc:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
+ * https://gcc.godbolt.org/z/xYez1j67Y.
  */
 #ifdef XXH_OLD_NAMES
 typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
 #endif
 static xxh_u32 XXH_read32(const void* ptr)
 {
-    typedef union { xxh_u32 u32; } __attribute__((packed)) xxh_unalign;
-    return ((const xxh_unalign*)ptr)->u32;
+    typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
+    return *((const xxh_unalign32*)ptr);
 }
 
 #else
@@ -1731,6 +2666,51 @@ static int XXH_isLittleEndian(void)
 #  define XXH_HAS_BUILTIN(x) 0
 #endif
 
+
+
+/*
+ * C23 and future versions have standard "unreachable()".
+ * Once it has been implemented reliably we can add it as an
+ * additional case:
+ *
+ * ```
+ * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN)
+ * #  include 
+ * #  ifdef unreachable
+ * #    define XXH_UNREACHABLE() unreachable()
+ * #  endif
+ * #endif
+ * ```
+ *
+ * Note C++23 also has std::unreachable() which can be detected
+ * as follows:
+ * ```
+ * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)
+ * #  include 
+ * #  define XXH_UNREACHABLE() std::unreachable()
+ * #endif
+ * ```
+ * NB: `__cpp_lib_unreachable` is defined in the `` header.
+ * We don't use that as including `` in `extern "C"` blocks
+ * doesn't work on GCC12
+ */
+
+#if XXH_HAS_BUILTIN(__builtin_unreachable)
+#  define XXH_UNREACHABLE() __builtin_unreachable()
+
+#elif defined(_MSC_VER)
+#  define XXH_UNREACHABLE() __assume(0)
+
+#else
+#  define XXH_UNREACHABLE()
+#endif
+
+#if XXH_HAS_BUILTIN(__builtin_assume)
+#  define XXH_ASSUME(c) __builtin_assume(c)
+#else
+#  define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
+#endif
+
 /*!
  * @internal
  * @def XXH_rotl32(x,r)
@@ -1853,8 +2833,10 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
 *********************************************************************/
 /*!
  * @}
- * @defgroup xxh32_impl XXH32 implementation
+ * @defgroup XXH32_impl XXH32 implementation
  * @ingroup impl
+ *
+ * Details on the XXH32 implementation.
  * @{
  */
  /* #define instead of static const, to be used as initializers */
@@ -1888,7 +2870,7 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
     acc += input * XXH_PRIME32_2;
     acc  = XXH_rotl32(acc, 13);
     acc *= XXH_PRIME32_1;
-#if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+#if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
     /*
      * UGLY HACK:
      * A compiler fence is the only thing that prevents GCC and Clang from
@@ -1918,9 +2900,12 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
      *   can load data, while v3 can multiply. SSE forces them to operate
      *   together.
      *
-     * This is also enabled on AArch64, as Clang autovectorizes it incorrectly
-     * and it is pointless writing a NEON implementation that is basically the
-     * same speed as scalar for XXH32.
+     * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing
+     * the loop. NEON is only faster on the A53, and with the newer cores, it is less
+     * than half the speed.
+     *
+     * Additionally, this is used on WASM SIMD128 because it JITs to the same
+     * SIMD instructions and has the same issue.
      */
     XXH_COMPILER_GUARD(acc);
 #endif
@@ -1934,17 +2919,17 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
  * The final mix ensures that all input bits have a chance to impact any bit in
  * the output digest, resulting in an unbiased distribution.
  *
- * @param h32 The hash to avalanche.
+ * @param hash The hash to avalanche.
  * @return The avalanched hash.
  */
-static xxh_u32 XXH32_avalanche(xxh_u32 h32)
+static xxh_u32 XXH32_avalanche(xxh_u32 hash)
 {
-    h32 ^= h32 >> 15;
-    h32 *= XXH_PRIME32_2;
-    h32 ^= h32 >> 13;
-    h32 *= XXH_PRIME32_3;
-    h32 ^= h32 >> 16;
-    return(h32);
+    hash ^= hash >> 15;
+    hash *= XXH_PRIME32_2;
+    hash ^= hash >> 13;
+    hash *= XXH_PRIME32_3;
+    hash ^= hash >> 16;
+    return hash;
 }
 
 #define XXH_get32bits(p) XXH_readLE32_align(p, align)
@@ -1957,24 +2942,25 @@ static xxh_u32 XXH32_avalanche(xxh_u32 h32)
  * This final stage will digest them to ensure that all input bytes are present
  * in the final mix.
  *
- * @param h32 The hash to finalize.
+ * @param hash The hash to finalize.
  * @param ptr The pointer to the remaining input.
  * @param len The remaining length, modulo 16.
  * @param align Whether @p ptr is aligned.
  * @return The finalized hash.
+ * @see XXH64_finalize().
  */
-static xxh_u32
-XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
+static XXH_PUREF xxh_u32
+XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
 {
-#define XXH_PROCESS1 do {                           \
-    h32 += (*ptr++) * XXH_PRIME32_5;                \
-    h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1;      \
+#define XXH_PROCESS1 do {                             \
+    hash += (*ptr++) * XXH_PRIME32_5;                 \
+    hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1;      \
 } while (0)
 
-#define XXH_PROCESS4 do {                           \
-    h32 += XXH_get32bits(ptr) * XXH_PRIME32_3;      \
-    ptr += 4;                                   \
-    h32  = XXH_rotl32(h32, 17) * XXH_PRIME32_4;     \
+#define XXH_PROCESS4 do {                             \
+    hash += XXH_get32bits(ptr) * XXH_PRIME32_3;       \
+    ptr += 4;                                         \
+    hash  = XXH_rotl32(hash, 17) * XXH_PRIME32_4;     \
 } while (0)
 
     if (ptr==NULL) XXH_ASSERT(len == 0);
@@ -1990,49 +2976,49 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
             XXH_PROCESS1;
             --len;
         }
-        return XXH32_avalanche(h32);
+        return XXH32_avalanche(hash);
     } else {
          switch(len&15) /* or switch(bEnd - p) */ {
            case 12:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 8:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 4:       XXH_PROCESS4;
-                         return XXH32_avalanche(h32);
+                         return XXH32_avalanche(hash);
 
            case 13:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 9:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 5:       XXH_PROCESS4;
                          XXH_PROCESS1;
-                         return XXH32_avalanche(h32);
+                         return XXH32_avalanche(hash);
 
            case 14:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 10:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 6:       XXH_PROCESS4;
                          XXH_PROCESS1;
                          XXH_PROCESS1;
-                         return XXH32_avalanche(h32);
+                         return XXH32_avalanche(hash);
 
            case 15:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 11:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 7:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 3:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 2:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 1:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;
-           case 0:       return XXH32_avalanche(h32);
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 0:       return XXH32_avalanche(hash);
         }
         XXH_ASSERT(0);
-        return h32;   /* reaching this point is deemed impossible */
+        return hash;   /* reaching this point is deemed impossible */
     }
 }
 
@@ -2052,7 +3038,7 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
  * @param align Whether @p input is aligned.
  * @return The calculated hash.
  */
-XXH_FORCE_INLINE xxh_u32
+XXH_FORCE_INLINE XXH_PUREF xxh_u32
 XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
 {
     xxh_u32 h32;
@@ -2085,10 +3071,10 @@ XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment
     return XXH32_finalize(h32, input, len&15, align);
 }
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
 {
-#if 0
+#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
     XXH32_state_t state;
     XXH32_reset(&state, seed);
@@ -2107,27 +3093,26 @@ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t s
 
 
 /*******   Hash streaming   *******/
-/*!
- * @ingroup xxh32_family
- */
+#ifndef XXH_NO_STREAM
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
 {
     return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
 }
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
 {
     XXH_free(statePtr);
     return XXH_OK;
 }
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
 {
     XXH_memcpy(dstState, srcState, sizeof(*dstState));
 }
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
 {
     XXH_ASSERT(statePtr != NULL);
@@ -2140,7 +3125,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t s
 }
 
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH_errorcode
 XXH32_update(XXH32_state_t* state, const void* input, size_t len)
 {
@@ -2195,7 +3180,7 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
 }
 
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
 {
     xxh_u32 h32;
@@ -2213,31 +3198,18 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
 
     return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
 }
-
+#endif /* !XXH_NO_STREAM */
 
 /*******   Canonical representation   *******/
 
-/*!
- * @ingroup xxh32_family
- * The default return values from XXH functions are unsigned 32 and 64 bit
- * integers.
- *
- * The canonical representation uses big endian convention, the same convention
- * as human-readable numbers (large digits first).
- *
- * This way, hash values can be written into a file or buffer, remaining
- * comparable across different systems.
- *
- * The following functions allow transformation of hash values to and from their
- * canonical format.
- */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
 {
-    /* XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); */
+    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
     XXH_memcpy(dst, &hash, sizeof(*dst));
 }
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
 {
     return XXH_readBE32(src);
@@ -2278,18 +3250,19 @@ static xxh_u64 XXH_read64(const void* memPtr)
 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
 
 /*
- * __pack instructions are safer, but compiler specific, hence potentially
- * problematic for some compilers.
- *
- * Currently only defined for GCC and ICC.
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
+ * documentation claimed that it only increased the alignment, but actually it
+ * can decrease it on gcc, clang, and icc:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
+ * https://gcc.godbolt.org/z/xYez1j67Y.
  */
 #ifdef XXH_OLD_NAMES
 typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
 #endif
 static xxh_u64 XXH_read64(const void* ptr)
 {
-    typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) xxh_unalign64;
-    return ((const xxh_unalign64*)ptr)->u64;
+    typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
+    return *((const xxh_unalign64*)ptr);
 }
 
 #else
@@ -2380,8 +3353,10 @@ XXH_readLE64_align(const void* ptr, XXH_alignment align)
 /*******   xxh64   *******/
 /*!
  * @}
- * @defgroup xxh64_impl XXH64 implementation
+ * @defgroup XXH64_impl XXH64 implementation
  * @ingroup impl
+ *
+ * Details on the XXH64 implementation.
  * @{
  */
 /* #define rather that static const, to be used as initializers */
@@ -2399,6 +3374,7 @@ XXH_readLE64_align(const void* ptr, XXH_alignment align)
 #  define PRIME64_5 XXH_PRIME64_5
 #endif
 
+/*! @copydoc XXH32_round */
 static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
 {
     acc += input * XXH_PRIME64_2;
@@ -2415,43 +3391,59 @@ static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
     return acc;
 }
 
-static xxh_u64 XXH64_avalanche(xxh_u64 h64)
+/*! @copydoc XXH32_avalanche */
+static xxh_u64 XXH64_avalanche(xxh_u64 hash)
 {
-    h64 ^= h64 >> 33;
-    h64 *= XXH_PRIME64_2;
-    h64 ^= h64 >> 29;
-    h64 *= XXH_PRIME64_3;
-    h64 ^= h64 >> 32;
-    return h64;
+    hash ^= hash >> 33;
+    hash *= XXH_PRIME64_2;
+    hash ^= hash >> 29;
+    hash *= XXH_PRIME64_3;
+    hash ^= hash >> 32;
+    return hash;
 }
 
 
 #define XXH_get64bits(p) XXH_readLE64_align(p, align)
 
-static xxh_u64
-XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
+/*!
+ * @internal
+ * @brief Processes the last 0-31 bytes of @p ptr.
+ *
+ * There may be up to 31 bytes remaining to consume from the input.
+ * This final stage will digest them to ensure that all input bytes are present
+ * in the final mix.
+ *
+ * @param hash The hash to finalize.
+ * @param ptr The pointer to the remaining input.
+ * @param len The remaining length, modulo 32.
+ * @param align Whether @p ptr is aligned.
+ * @return The finalized hash
+ * @see XXH32_finalize().
+ */
+static XXH_PUREF xxh_u64
+XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
 {
     if (ptr==NULL) XXH_ASSERT(len == 0);
     len &= 31;
     while (len >= 8) {
         xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
         ptr += 8;
-        h64 ^= k1;
-        h64  = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
+        hash ^= k1;
+        hash  = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
         len -= 8;
     }
     if (len >= 4) {
-        h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
+        hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
         ptr += 4;
-        h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
+        hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
         len -= 4;
     }
     while (len > 0) {
-        h64 ^= (*ptr++) * XXH_PRIME64_5;
-        h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1;
+        hash ^= (*ptr++) * XXH_PRIME64_5;
+        hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
         --len;
     }
-    return  XXH64_avalanche(h64);
+    return  XXH64_avalanche(hash);
 }
 
 #ifdef XXH_OLD_NAMES
@@ -2464,7 +3456,15 @@ XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
 #  undef XXH_PROCESS8_64
 #endif
 
-XXH_FORCE_INLINE xxh_u64
+/*!
+ * @internal
+ * @brief The implementation for @ref XXH64().
+ *
+ * @param input , len , seed Directly passed from @ref XXH64().
+ * @param align Whether @p input is aligned.
+ * @return The calculated hash.
+ */
+XXH_FORCE_INLINE XXH_PUREF xxh_u64
 XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
 {
     xxh_u64 h64;
@@ -2501,10 +3501,10 @@ XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment
 }
 
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
 {
-#if 0
+#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
     XXH64_state_t state;
     XXH64_reset(&state, seed);
@@ -2522,27 +3522,27 @@ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t s
 }
 
 /*******   Hash Streaming   *******/
-
-/*! @ingroup xxh64_family*/
+#ifndef XXH_NO_STREAM
+/*! @ingroup XXH64_family*/
 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
 {
     return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
 }
-/*! @ingroup xxh64_family */
+/*! @ingroup XXH64_family */
 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
 {
     XXH_free(statePtr);
     return XXH_OK;
 }
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState)
 {
     XXH_memcpy(dstState, srcState, sizeof(*dstState));
 }
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed)
 {
     XXH_ASSERT(statePtr != NULL);
     memset(statePtr, 0, sizeof(*statePtr));
@@ -2553,9 +3553,9 @@ XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t s
     return XXH_OK;
 }
 
-/*! @ingroup xxh64_family */
+/*! @ingroup XXH64_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH64_update (XXH64_state_t* state, const void* input, size_t len)
+XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)
 {
     if (input==NULL) {
         XXH_ASSERT(len == 0);
@@ -2605,8 +3605,8 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
 }
 
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state)
 {
     xxh_u64 h64;
 
@@ -2624,20 +3624,20 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
 
     return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
 }
-
+#endif /* !XXH_NO_STREAM */
 
 /******* Canonical representation   *******/
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash)
 {
-    /* XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); */
+    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
     XXH_memcpy(dst, &hash, sizeof(*dst));
 }
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src)
 {
     return XXH_readBE64(src);
 }
@@ -2650,7 +3650,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 ************************************************************************ */
 /*!
  * @}
- * @defgroup xxh3_impl XXH3 implementation
+ * @defgroup XXH3_impl XXH3 implementation
  * @ingroup impl
  * @{
  */
@@ -2658,11 +3658,19 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 /* ===   Compiler specifics   === */
 
 #if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
-#  define XXH_RESTRICT /* disable */
+#  define XXH_RESTRICT   /* disable */
 #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */
 #  define XXH_RESTRICT   restrict
+#elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
+   || (defined (__clang__)) \
+   || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
+   || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
+/*
+ * There are a LOT more compilers that recognize __restrict but this
+ * covers the major ones.
+ */
+#  define XXH_RESTRICT   __restrict
 #else
-/* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */
 #  define XXH_RESTRICT   /* disable */
 #endif
 
@@ -2676,10 +3684,26 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 #    define XXH_unlikely(x) (x)
 #endif
 
+#ifndef XXH_HAS_INCLUDE
+#  ifdef __has_include
+/*
+ * Not defined as XXH_HAS_INCLUDE(x) (function-like) because
+ * this causes segfaults in Apple Clang 4.2 (on Mac OS X 10.7 Lion)
+ */
+#    define XXH_HAS_INCLUDE __has_include
+#  else
+#    define XXH_HAS_INCLUDE(x) 0
+#  endif
+#endif
+
 #if defined(__GNUC__) || defined(__clang__)
+#  if defined(__ARM_FEATURE_SVE)
+#    include 
+#  endif
 #  if defined(__ARM_NEON__) || defined(__ARM_NEON) \
-   || defined(__aarch64__)  || defined(_M_ARM) \
-   || defined(_M_ARM64)     || defined(_M_ARM64EC)
+   || (defined(_M_ARM) && _M_ARM >= 7) \
+   || defined(_M_ARM64) || defined(_M_ARM64EC) \
+   || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE()) /* WASM SIMD128 via SIMDe */
 #    define inline __inline__  /* circumvent a clang bug */
 #    include 
 #    undef inline
@@ -2790,7 +3814,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
  * Note that these are actually implemented as macros.
  *
  * If this is not defined, it is detected automatically.
- * @ref XXH_X86DISPATCH overrides this.
+ * internal macro XXH_X86DISPATCH overrides this.
  */
 enum XXH_VECTOR_TYPE /* fake enum */ {
     XXH_SCALAR = 0,  /*!< Portable scalar version */
@@ -2802,8 +3826,13 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
                       */
     XXH_AVX2   = 2,  /*!< AVX2 for Haswell and Bulldozer */
     XXH_AVX512 = 3,  /*!< AVX512 for Skylake and Icelake */
-    XXH_NEON   = 4,  /*!< NEON for most ARMv7-A and all AArch64 */
+    XXH_NEON   = 4,  /*!<
+                       * NEON for most ARMv7-A, all AArch64, and WASM SIMD128
+                       * via the SIMDeverywhere polyfill provided with the
+                       * Emscripten SDK.
+                       */
     XXH_VSX    = 5,  /*!< VSX and ZVector for POWER8/z13 (64-bit) */
+    XXH_SVE    = 6,  /*!< SVE for some ARMv8-A and ARMv9-A */
 };
 /*!
  * @ingroup tuning
@@ -2825,12 +3854,16 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
 #  define XXH_AVX512 3
 #  define XXH_NEON   4
 #  define XXH_VSX    5
+#  define XXH_SVE    6
 #endif
 
 #ifndef XXH_VECTOR    /* can be defined on command line */
-#  if ( \
+#  if defined(__ARM_FEATURE_SVE)
+#    define XXH_VECTOR XXH_SVE
+#  elif ( \
         defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
      || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
+     || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE()) /* wasm simd128 via SIMDe */ \
    ) && ( \
         defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
@@ -2851,6 +3884,17 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
 #  endif
 #endif
 
+/* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
+#if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
+#  ifdef _MSC_VER
+#    pragma warning(once : 4606)
+#  else
+#    warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
+#  endif
+#  undef XXH_VECTOR
+#  define XXH_VECTOR XXH_SCALAR
+#endif
+
 /*
  * Controls the alignment of the accumulator,
  * for compatibility with aligned vector loads, which are usually faster.
@@ -2870,16 +3914,26 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
 #     define XXH_ACC_ALIGN 16
 #  elif XXH_VECTOR == XXH_AVX512  /* avx512 */
 #     define XXH_ACC_ALIGN 64
+#  elif XXH_VECTOR == XXH_SVE   /* sve */
+#     define XXH_ACC_ALIGN 64
 #  endif
 #endif
 
 #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
     || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
 #  define XXH_SEC_ALIGN XXH_ACC_ALIGN
+#elif XXH_VECTOR == XXH_SVE
+#  define XXH_SEC_ALIGN XXH_ACC_ALIGN
 #else
 #  define XXH_SEC_ALIGN 8
 #endif
 
+#if defined(__GNUC__) || defined(__clang__)
+#  define XXH_ALIASING __attribute__((may_alias))
+#else
+#  define XXH_ALIASING /* nothing */
+#endif
+
 /*
  * UGLY HACK:
  * GCC usually generates the best code with -O3 for xxHash.
@@ -2894,162 +3948,135 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
  * That is why when compiling the AVX2 version, it is recommended to use either
  *   -O2 -mavx2 -march=haswell
  * or
- *   -O2 -mavx2 -mno-avx256-split-unaligned-load
- * for decent performance, or to use Clang instead.
- *
- * Fortunately, we can control the first one with a pragma that forces GCC into
- * -O2, but the other one we can't control without "failed to inline always
- * inline function due to target mismatch" warnings.
- */
-#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
-  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
-  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
-#  pragma GCC push_options
-#  pragma GCC optimize("-O2")
-#endif
-
-
-#if XXH_VECTOR == XXH_NEON
-/*
- * NEON's setup for vmlal_u32 is a little more complicated than it is on
- * SSE2, AVX2, and VSX.
- *
- * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an upcast.
- *
- * To do the same operation, the 128-bit 'Q' register needs to be split into
- * two 64-bit 'D' registers, performing this operation::
- *
- *   [                a                 |                 b                ]
- *            |              '---------. .--------'                |
- *            |                         x                          |
- *            |              .---------' '--------.                |
- *   [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[    a >> 32     |     b >> 32    ]
- *
- * Due to significant changes in aarch64, the fastest method for aarch64 is
- * completely different than the fastest method for ARMv7-A.
- *
- * ARMv7-A treats D registers as unions overlaying Q registers, so modifying
- * D11 will modify the high half of Q5. This is similar to how modifying AH
- * will only affect bits 8-15 of AX on x86.
- *
- * VZIP takes two registers, and puts even lanes in one register and odd lanes
- * in the other.
- *
- * On ARMv7-A, this strangely modifies both parameters in place instead of
- * taking the usual 3-operand form.
- *
- * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the
- * lower and upper halves of the Q register to end up with the high and low
- * halves where we want - all in one instruction.
- *
- *   vzip.32   d10, d11       @ d10 = { d10[0], d11[0] }; d11 = { d10[1], d11[1] }
- *
- * Unfortunately we need inline assembly for this: Instructions modifying two
- * registers at once is not possible in GCC or Clang's IR, and they have to
- * create a copy.
- *
- * aarch64 requires a different approach.
- *
- * In order to make it easier to write a decent compiler for aarch64, many
- * quirks were removed, such as conditional execution.
- *
- * NEON was also affected by this.
- *
- * aarch64 cannot access the high bits of a Q-form register, and writes to a
- * D-form register zero the high bits, similar to how writes to W-form scalar
- * registers (or DWORD registers on x86_64) work.
- *
- * The formerly free vget_high intrinsics now require a vext (with a few
- * exceptions)
- *
- * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the equivalent
- * of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to only modify one
- * operand.
+ *   -O2 -mavx2 -mno-avx256-split-unaligned-load
+ * for decent performance, or to use Clang instead.
  *
- * The equivalent of the VZIP.32 on the lower and upper halves would be this
- * mess:
+ * Fortunately, we can control the first one with a pragma that forces GCC into
+ * -O2, but the other one we can't control without "failed to inline always
+ * inline function due to target mismatch" warnings.
+ */
+#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
+  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+  && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
+#  pragma GCC push_options
+#  pragma GCC optimize("-O2")
+#endif
+
+#if XXH_VECTOR == XXH_NEON
+
+/*
+ * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3
+ * optimizes out the entire hashLong loop because of the aliasing violation.
  *
- *   ext     v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] }
- *   zip1    v1.2s, v0.2s, v2.2s     // v1 = { v0[0], v2[0] }
- *   zip2    v0.2s, v0.2s, v1.2s     // v0 = { v0[1], v2[1] }
+ * However, GCC is also inefficient at load-store optimization with vld1q/vst1q,
+ * so the only option is to mark it as aliasing.
+ */
+typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
+
+/*!
+ * @internal
+ * @brief `vld1q_u64` but faster and alignment-safe.
  *
- * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 (SHRN):
+ * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
+ * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
  *
- *   shrn    v1.2s, v0.2d, #32  // v1 = (uint32x2_t)(v0 >> 32);
- *   xtn     v0.2s, v0.2d       // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF);
+ * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
+ * prohibits load-store optimizations. Therefore, a direct dereference is used.
  *
- * This is available on ARMv7-A, but is less efficient than a single VZIP.32.
+ * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
+ * unaligned load.
  */
+#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
+XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
+{
+    return *(xxh_aliasing_uint64x2_t const *)ptr;
+}
+#else
+XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
+{
+    return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
+}
+#endif
 
 /*!
- * Function-like macro:
- * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t &outHi)
- * {
- *     outLo = (uint32x2_t)(in & 0xFFFFFFFF);
- *     outHi = (uint32x2_t)(in >> 32);
- *     in = UNDEFINED;
- * }
+ * @internal
+ * @brief `vmlal_u32` on low and high halves of a vector.
+ *
+ * This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with
+ * inline assembly and were therefore incapable of merging the `vget_{low, high}_u32`
+ * with `vmlal_u32`.
  */
-# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
-   && (defined(__GNUC__) || defined(__clang__)) \
-   && (defined(__arm__) || defined(__thumb__) || defined(_M_ARM))
-#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                              \
-    do {                                                                                    \
-      /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
-      /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */     \
-      /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \
-      __asm__("vzip.32  %e0, %f0" : "+w" (in));                                             \
-      (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in));                                   \
-      (outHi) = vget_high_u32(vreinterpretq_u32_u64(in));                                   \
-   } while (0)
-# else
-#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                            \
-    do {                                                                                  \
-      (outLo) = vmovn_u64    (in);                                                        \
-      (outHi) = vshrn_n_u64  ((in), 32);                                                  \
-    } while (0)
-# endif
+#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+    /* Inline assembly is the only way */
+    __asm__("umlal   %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs));
+    return acc;
+}
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+    /* This intrinsic works as expected */
+    return vmlal_high_u32(acc, lhs, rhs);
+}
+#else
+/* Portable intrinsic versions */
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+    return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs));
+}
+/*! @copydoc XXH_vmlal_low_u32
+ * Assume the compiler converts this to vmlal_high_u32 on aarch64 */
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+    return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs));
+}
+#endif
 
 /*!
  * @ingroup tuning
  * @brief Controls the NEON to scalar ratio for XXH3
  *
- * On AArch64 when not optimizing for size, XXH3 will run 6 lanes using NEON and
- * 2 lanes on scalar by default.
+ * This can be set to 2, 4, 6, or 8.
  *
- * This can be set to 2, 4, 6, or 8. ARMv7 will default to all 8 NEON lanes, as the
- * emulated 64-bit arithmetic is too slow.
+ * ARM Cortex CPUs are _very_ sensitive to how their pipelines are used.
  *
- * Modern ARM CPUs are _very_ sensitive to how their pipelines are used.
+ * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those
+ * can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU
+ * bandwidth.
  *
- * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but it can't
- * have more than 2 NEON (F0/F1) micro-ops. If you are only using NEON instructions,
- * you are only using 2/3 of the CPU bandwidth.
- *
- * This is even more noticeable on the more advanced cores like the A76 which
+ * This is even more noticeable on the more advanced cores like the Cortex-A76 which
  * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
  *
- * Therefore, @ref XXH3_NEON_LANES lanes will be processed using NEON, and the
- * remaining lanes will use scalar instructions. This improves the bandwidth
- * and also gives the integer pipelines something to do besides twiddling loop
- * counters and pointers.
+ * Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes
+ * and 2 scalar lanes, which is chosen by default.
+ *
+ * This does not apply to Apple processors or 32-bit processors, which run better with
+ * full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes.
  *
  * This change benefits CPUs with large micro-op buffers without negatively affecting
- * other CPUs:
+ * most other CPUs:
  *
  *  | Chipset               | Dispatch type       | NEON only | 6:2 hybrid | Diff. |
  *  |:----------------------|:--------------------|----------:|-----------:|------:|
  *  | Snapdragon 730 (A76)  | 2 NEON/8 micro-ops  |  8.8 GB/s |  10.1 GB/s |  ~16% |
  *  | Snapdragon 835 (A73)  | 2 NEON/3 micro-ops  |  5.1 GB/s |   5.3 GB/s |   ~5% |
  *  | Marvell PXA1928 (A53) | In-order dual-issue |  1.9 GB/s |   1.9 GB/s |    0% |
+ *  | Apple M1              | 4 NEON/8 micro-ops  | 37.3 GB/s |  36.1 GB/s |  ~-3% |
  *
  * It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
  *
+ * When using WASM SIMD128, if this is 2 or 6, SIMDe will scalarize 2 of the lanes meaning
+ * it effectively becomes worse 4.
+ *
  * @see XXH3_accumulate_512_neon()
  */
 # ifndef XXH3_NEON_LANES
 #  if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
-   && !defined(__OPTIMIZE_SIZE__)
+   && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
 #   define XXH3_NEON_LANES 6
 #  else
 #   define XXH3_NEON_LANES XXH_ACC_NB
@@ -3066,27 +4093,42 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
  * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
  */
 #if XXH_VECTOR == XXH_VSX
+/* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
+ * and `pixel`. This is a problem for obvious reasons.
+ *
+ * These keywords are unnecessary; the spec literally says they are
+ * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
+ * after including the header.
+ *
+ * We use pragma push_macro/pop_macro to keep the namespace clean. */
+#  pragma push_macro("bool")
+#  pragma push_macro("vector")
+#  pragma push_macro("pixel")
+/* silence potential macro redefined warnings */
+#  undef bool
+#  undef vector
+#  undef pixel
+
 #  if defined(__s390x__)
 #    include 
 #  else
-/* gcc's altivec.h can have the unwanted consequence to unconditionally
- * #define bool, vector, and pixel keywords,
- * with bad consequences for programs already using these keywords for other purposes.
- * The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined.
- * __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler,
- * but it seems that, in some cases, it isn't.
- * Force the build macro to be defined, so that keywords are not altered.
- */
-#    if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
-#      define __APPLE_ALTIVEC__
-#    endif
 #    include 
 #  endif
 
+/* Restore the original macro values, if applicable. */
+#  pragma pop_macro("pixel")
+#  pragma pop_macro("vector")
+#  pragma pop_macro("bool")
+
 typedef __vector unsigned long long xxh_u64x2;
 typedef __vector unsigned char xxh_u8x16;
 typedef __vector unsigned xxh_u32x4;
 
+/*
+ * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue.
+ */
+typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
+
 # ifndef XXH_VSX_BE
 #  if defined(__BIG_ENDIAN__) \
   || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
@@ -3138,8 +4180,9 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
  /* s390x is always big endian, no issue on this platform */
 #  define XXH_vec_mulo vec_mulo
 #  define XXH_vec_mule vec_mule
-# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
+# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
 /* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
+ /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */
 #  define XXH_vec_mulo __builtin_altivec_vmulouw
 #  define XXH_vec_mule __builtin_altivec_vmuleuw
 # else
@@ -3160,13 +4203,28 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
 # endif /* XXH_vec_mulo, XXH_vec_mule */
 #endif /* XXH_VECTOR == XXH_VSX */
 
+#if XXH_VECTOR == XXH_SVE
+#define ACCRND(acc, offset) \
+do { \
+    svuint64_t input_vec = svld1_u64(mask, xinput + offset);         \
+    svuint64_t secret_vec = svld1_u64(mask, xsecret + offset);       \
+    svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec);     \
+    svuint64_t swapped = svtbl_u64(input_vec, kSwap);                \
+    svuint64_t mixed_lo = svextw_u64_x(mask, mixed);                 \
+    svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32);            \
+    svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
+    acc = svadd_u64_x(mask, acc, mul);                               \
+} while (0)
+#endif /* XXH_VECTOR == XXH_SVE */
 
 /* prefetch
  * can be disabled, by declaring XXH_NO_PREFETCH build macro */
 #if defined(XXH_NO_PREFETCH)
 #  define XXH_PREFETCH(ptr)  (void)(ptr)  /* disabled */
 #else
-#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) && !defined(_M_ARM64EC)  /* _mm_prefetch() not defined outside of x86/x64 */
+#  if XXH_SIZE_OPT >= 1
+#    define XXH_PREFETCH(ptr) (void)(ptr)
+#  elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))  /* _mm_prefetch() not defined outside of x86/x64 */
 #    include    /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
 #    define XXH_PREFETCH(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
 #  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
@@ -3203,6 +4261,8 @@ XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
     0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
 };
 
+static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL;  /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */
+static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL;  /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */
 
 #ifdef XXH_OLD_NAMES
 #  define kSecret XXH3_kSecret
@@ -3394,7 +4454,7 @@ XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
 }
 
 /*! Seems to produce slightly better code on GCC for some reason. */
-XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
+XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
 {
     XXH_ASSERT(0 <= shift && shift < 64);
     return v64 ^ (v64 >> shift);
@@ -3407,7 +4467,7 @@ XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
 static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
 {
     h64 = XXH_xorshift64(h64, 37);
-    h64 *= 0x165667919E3779F9ULL;
+    h64 *= PRIME_MX1;
     h64 = XXH_xorshift64(h64, 32);
     return h64;
 }
@@ -3421,9 +4481,9 @@ static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
 {
     /* this mix is inspired by Pelle Evensen's rrmxmx */
     h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
-    h64 *= 0x9FB21C651E98DF25ULL;
+    h64 *= PRIME_MX2;
     h64 ^= (h64 >> 35) + len ;
-    h64 *= 0x9FB21C651E98DF25ULL;
+    h64 *= PRIME_MX2;
     return XXH_xorshift64(h64, 28);
 }
 
@@ -3461,7 +4521,7 @@ static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
  *
  * This adds an extra layer of strength for custom secrets.
  */
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -3483,7 +4543,7 @@ XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_h
     }
 }
 
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -3499,7 +4559,7 @@ XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_h
     }
 }
 
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -3516,7 +4576,7 @@ XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
     }
 }
 
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(len <= 16);
@@ -3586,7 +4646,7 @@ XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
 }
 
 /* For mid range keys, XXH3 uses a Mum-hash variant. */
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                      XXH64_hash_t seed)
@@ -3595,6 +4655,14 @@ XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
     XXH_ASSERT(16 < len && len <= 128);
 
     {   xxh_u64 acc = len * XXH_PRIME64_1;
+#if XXH_SIZE_OPT >= 1
+        /* Smaller and cleaner, but slightly slower. */
+        unsigned int i = (unsigned int)(len - 1) / 32;
+        do {
+            acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
+            acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
+        } while (i-- != 0);
+#else
         if (len > 32) {
             if (len > 64) {
                 if (len > 96) {
@@ -3609,14 +4677,17 @@ XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
         }
         acc += XXH3_mix16B(input+0, secret+0, seed);
         acc += XXH3_mix16B(input+len-16, secret+16, seed);
-
+#endif
         return XXH3_avalanche(acc);
     }
 }
 
+/*!
+ * @brief Maximum size of "short" key in bytes.
+ */
 #define XXH3_MIDSIZE_MAX 240
 
-XXH_NO_INLINE XXH64_hash_t
+XXH_NO_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                       XXH64_hash_t seed)
@@ -3628,13 +4699,17 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
     #define XXH3_MIDSIZE_LASTOFFSET  17
 
     {   xxh_u64 acc = len * XXH_PRIME64_1;
-        int const nbRounds = (int)len / 16;
-        int i;
+        xxh_u64 acc_end;
+        unsigned int const nbRounds = (unsigned int)len / 16;
+        unsigned int i;
+        XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
         for (i=0; i<8; i++) {
             acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
         }
-        acc = XXH3_avalanche(acc);
+        /* last bytes */
+        acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
         XXH_ASSERT(nbRounds >= 8);
+        acc = XXH3_avalanche(acc);
 #if defined(__clang__)                                /* Clang */ \
     && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
     && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
@@ -3661,11 +4736,13 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
         #pragma clang loop vectorize(disable)
 #endif
         for (i=8 ; i < nbRounds; i++) {
-            acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
+            /*
+             * Prevents clang for unrolling the acc loop and interleaving with this one.
+             */
+            XXH_COMPILER_GUARD(acc);
+            acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
         }
-        /* last bytes */
-        acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
-        return XXH3_avalanche(acc);
+        return XXH3_avalanche(acc + acc_end);
     }
 }
 
@@ -3681,6 +4758,47 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
 #  define ACC_NB XXH_ACC_NB
 #endif
 
+#ifndef XXH_PREFETCH_DIST
+#  ifdef __clang__
+#    define XXH_PREFETCH_DIST 320
+#  else
+#    if (XXH_VECTOR == XXH_AVX512)
+#      define XXH_PREFETCH_DIST 512
+#    else
+#      define XXH_PREFETCH_DIST 384
+#    endif
+#  endif  /* __clang__ */
+#endif  /* XXH_PREFETCH_DIST */
+
+/*
+ * These macros are to generate an XXH3_accumulate() function.
+ * The two arguments select the name suffix and target attribute.
+ *
+ * The name of this symbol is XXH3_accumulate_() and it calls
+ * XXH3_accumulate_512_().
+ *
+ * It may be useful to hand implement this function if the compiler fails to
+ * optimize the inline function.
+ */
+#define XXH3_ACCUMULATE_TEMPLATE(name)                      \
+void                                                        \
+XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc,           \
+                       const xxh_u8* XXH_RESTRICT input,    \
+                       const xxh_u8* XXH_RESTRICT secret,   \
+                       size_t nbStripes)                    \
+{                                                           \
+    size_t n;                                               \
+    for (n = 0; n < nbStripes; n++ ) {                      \
+        const xxh_u8* const in = input + n*XXH_STRIPE_LEN;  \
+        XXH_PREFETCH(in + XXH_PREFETCH_DIST);               \
+        XXH3_accumulate_512_##name(                         \
+                 acc,                                       \
+                 in,                                        \
+                 secret + n*XXH_SECRET_CONSUME_RATE);       \
+    }                                                       \
+}
+
+
 XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
 {
     if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
@@ -3749,7 +4867,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
         /* data_key    = data_vec ^ key_vec; */
         __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
         /* data_key_lo = data_key >> 32; */
-        __m512i const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+        __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);
         /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
         __m512i const product     = _mm512_mul_epu32     (data_key, data_key_lo);
         /* xacc[0] += swap(data_vec); */
@@ -3759,6 +4877,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
         *xacc = _mm512_add_epi64(product, sum);
     }
 }
+XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
 
 /*
  * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
@@ -3792,13 +4911,12 @@ XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
         /* xacc[0] ^= (xacc[0] >> 47) */
         __m512i const acc_vec     = *xacc;
         __m512i const shifted     = _mm512_srli_epi64    (acc_vec, 47);
-        __m512i const data_vec    = _mm512_xor_si512     (acc_vec, shifted);
         /* xacc[0] ^= secret; */
         __m512i const key_vec     = _mm512_loadu_si512   (secret);
-        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
+        __m512i const data_key    = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);
 
         /* xacc[0] *= XXH_PRIME32_1; */
-        __m512i const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+        __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);
         __m512i const prod_lo     = _mm512_mul_epu32     (data_key, prime32);
         __m512i const prod_hi     = _mm512_mul_epu32     (data_key_hi, prime32);
         *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
@@ -3813,7 +4931,8 @@ XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
     XXH_ASSERT(((size_t)customSecret & 63) == 0);
     (void)(&XXH_writeLE64);
     {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
-        __m512i const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, (xxh_i64)(0U - seed64));
+        __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
+        __m512i const seed     = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
 
         const __m512i* const src  = (const __m512i*) ((const void*) XXH3_kSecret);
               __m512i* const dest = (      __m512i*) customSecret;
@@ -3821,14 +4940,7 @@ XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
         XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
         XXH_ASSERT(((size_t)dest & 63) == 0);
         for (i=0; i < nbRounds; ++i) {
-            /* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void const*',
-             * this will warn "discards 'const' qualifier". */
-            union {
-                const __m512i* cp;
-                void* p;
-            } remote_const_void;
-            remote_const_void.cp = src + i;
-            dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);
+            dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
     }   }
 }
 
@@ -3864,7 +4976,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
             /* data_key    = data_vec ^ key_vec; */
             __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
             /* data_key_lo = data_key >> 32; */
-            __m256i const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);
             /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
             __m256i const product     = _mm256_mul_epu32     (data_key, data_key_lo);
             /* xacc[i] += swap(data_vec); */
@@ -3874,6 +4986,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
             xacc[i] = _mm256_add_epi64(product, sum);
     }   }
 }
+XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
 
 XXH_FORCE_INLINE XXH_TARGET_AVX2 void
 XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
@@ -3896,7 +5009,7 @@ XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
             __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
 
             /* xacc[i] *= XXH_PRIME32_1; */
-            __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);
             __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
             __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
             xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
@@ -3928,12 +5041,12 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTR
         XXH_ASSERT(((size_t)dest & 31) == 0);
 
         /* GCC -O2 need unroll loop manually */
-        dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed);
-        dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed);
-        dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed);
-        dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed);
-        dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed);
-        dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed);
+        dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);
+        dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);
+        dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);
+        dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);
+        dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);
+        dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);
     }
 }
 
@@ -3980,6 +5093,7 @@ XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
             xacc[i] = _mm_add_epi64(product, sum);
     }   }
 }
+XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
 
 XXH_FORCE_INLINE XXH_TARGET_SSE2 void
 XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
@@ -4058,14 +5172,28 @@ XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
 
 /*!
  * @internal
- * @brief The bulk processing loop for NEON.
+ * @brief The bulk processing loop for NEON and WASM SIMD128.
  *
  * The NEON code path is actually partially scalar when running on AArch64. This
  * is to optimize the pipelining and can have up to 15% speedup depending on the
  * CPU, and it also mitigates some GCC codegen issues.
  *
  * @see XXH3_NEON_LANES for configuring this and details about this optimization.
+ *
+ * NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit
+ * integers instead of the other platforms which mask full 64-bit vectors,
+ * so the setup is more complicated than just shifting right.
+ *
+ * Additionally, there is an optimization for 4 lanes at once noted below.
+ *
+ * Since, as stated, the most optimal amount of lanes for Cortexes is 6,
+ * there needs to be *three* versions of the accumulate operation used
+ * for the remaining 2 lanes.
+ *
+ * WASM's SIMD128 uses SIMDe's arm_neon.h polyfill because the intrinsics overlap
+ * nearly perfectly.
  */
+
 XXH_FORCE_INLINE void
 XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
                     const void* XXH_RESTRICT input,
@@ -4073,101 +5201,182 @@ XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
 {
     XXH_ASSERT((((size_t)acc) & 15) == 0);
     XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
-    {
-        uint64x2_t* const xacc = (uint64x2_t *) acc;
+    {   /* GCC for darwin arm64 does not like aliasing here */
+        xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc;
         /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
-        uint8_t const* const xinput = (const uint8_t *) input;
-        uint8_t const* const xsecret  = (const uint8_t *) secret;
+        uint8_t const* xinput = (const uint8_t *) input;
+        uint8_t const* xsecret  = (const uint8_t *) secret;
 
         size_t i;
-        /* NEON for the first few lanes (these loops are normally interleaved) */
-        for (i=0; i < XXH3_NEON_LANES / 2; i++) {
+#ifdef __wasm_simd128__
+        /*
+         * On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret
+         * is constant propagated, which results in it converting it to this
+         * inside the loop:
+         *
+         *    a = v128.load(XXH3_kSecret +  0 + $secret_offset, offset = 0)
+         *    b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0)
+         *    ...
+         *
+         * This requires a full 32-bit address immediate (and therefore a 6 byte
+         * instruction) as well as an add for each offset.
+         *
+         * Putting an asm guard prevents it from folding (at the cost of losing
+         * the alignment hint), and uses the free offset in `v128.load` instead
+         * of adding secret_offset each time which overall reduces code size by
+         * about a kilobyte and improves performance.
+         */
+        XXH_COMPILER_GUARD(xsecret);
+#endif
+        /* Scalar lanes use the normal scalarRound routine */
+        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
+            XXH3_scalarRound(acc, input, secret, i);
+        }
+        i = 0;
+        /* 4 NEON lanes at a time. */
+        for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {
             /* data_vec = xinput[i]; */
-            uint8x16_t data_vec    = vld1q_u8(xinput  + (i * 16));
+            uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput  + (i * 16));
+            uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput  + ((i+1) * 16));
             /* key_vec  = xsecret[i];  */
-            uint8x16_t key_vec     = vld1q_u8(xsecret + (i * 16));
-            uint64x2_t data_key;
-            uint32x2_t data_key_lo, data_key_hi;
-            /* xacc[i] += swap(data_vec); */
-            uint64x2_t const data64  = vreinterpretq_u64_u8(data_vec);
-            uint64x2_t const swapped = vextq_u64(data64, data64, 1);
-            xacc[i] = vaddq_u64 (xacc[i], swapped);
+            uint64x2_t key_vec_1  = XXH_vld1q_u64(xsecret + (i * 16));
+            uint64x2_t key_vec_2  = XXH_vld1q_u64(xsecret + ((i+1) * 16));
+            /* data_swap = swap(data_vec) */
+            uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
+            uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
             /* data_key = data_vec ^ key_vec; */
-            data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));
-            /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
-             * data_key_hi = (uint32x2_t) (data_key >> 32);
-             * data_key = UNDEFINED; */
-            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
-            /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
-            xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);
+            uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
+            uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
 
+            /*
+             * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
+             * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
+             * get one vector with the low 32 bits of each lane, and one vector
+             * with the high 32 bits of each lane.
+             *
+             * The intrinsic returns a double vector because the original ARMv7-a
+             * instruction modified both arguments in place. AArch64 and SIMD128 emit
+             * two instructions from this intrinsic.
+             *
+             *  [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
+             *  [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
+             */
+            uint32x4x2_t unzipped = vuzpq_u32(
+                vreinterpretq_u32_u64(data_key_1),
+                vreinterpretq_u32_u64(data_key_2)
+            );
+            /* data_key_lo = data_key & 0xFFFFFFFF */
+            uint32x4_t data_key_lo = unzipped.val[0];
+            /* data_key_hi = data_key >> 32 */
+            uint32x4_t data_key_hi = unzipped.val[1];
+            /*
+             * Then, we can split the vectors horizontally and multiply which, as for most
+             * widening intrinsics, have a variant that works on both high half vectors
+             * for free on AArch64. A similar instruction is available on SIMD128.
+             *
+             * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
+             */
+            uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
+            uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
+            /*
+             * Clang reorders
+             *    a += b * c;     // umlal   swap.2d, dkl.2s, dkh.2s
+             *    c += a;         // add     acc.2d, acc.2d, swap.2d
+             * to
+             *    c += a;         // add     acc.2d, acc.2d, swap.2d
+             *    c += b * c;     // umlal   acc.2d, dkl.2s, dkh.2s
+             *
+             * While it would make sense in theory since the addition is faster,
+             * for reasons likely related to umlal being limited to certain NEON
+             * pipelines, this is worse. A compiler guard fixes this.
+             */
+            XXH_COMPILER_GUARD_CLANG_NEON(sum_1);
+            XXH_COMPILER_GUARD_CLANG_NEON(sum_2);
+            /* xacc[i] = acc_vec + sum; */
+            xacc[i]   = vaddq_u64(xacc[i], sum_1);
+            xacc[i+1] = vaddq_u64(xacc[i+1], sum_2);
         }
-        /* Scalar for the remainder. This may be a zero iteration loop. */
-        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
-            XXH3_scalarRound(acc, input, secret, i);
+        /* Operate on the remaining NEON lanes 2 at a time. */
+        for (; i < XXH3_NEON_LANES / 2; i++) {
+            /* data_vec = xinput[i]; */
+            uint64x2_t data_vec = XXH_vld1q_u64(xinput  + (i * 16));
+            /* key_vec  = xsecret[i];  */
+            uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
+            /* acc_vec_2 = swap(data_vec) */
+            uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
+            /* data_key = data_vec ^ key_vec; */
+            uint64x2_t data_key = veorq_u64(data_vec, key_vec);
+            /* For two lanes, just use VMOVN and VSHRN. */
+            /* data_key_lo = data_key & 0xFFFFFFFF; */
+            uint32x2_t data_key_lo = vmovn_u64(data_key);
+            /* data_key_hi = data_key >> 32; */
+            uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
+            /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */
+            uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
+            /* Same Clang workaround as before */
+            XXH_COMPILER_GUARD_CLANG_NEON(sum);
+            /* xacc[i] = acc_vec + sum; */
+            xacc[i] = vaddq_u64 (xacc[i], sum);
         }
     }
 }
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
 
 XXH_FORCE_INLINE void
 XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
 {
     XXH_ASSERT((((size_t)acc) & 15) == 0);
 
-    {   uint64x2_t* xacc       = (uint64x2_t*) acc;
+    {   xxh_aliasing_uint64x2_t* xacc       = (xxh_aliasing_uint64x2_t*) acc;
         uint8_t const* xsecret = (uint8_t const*) secret;
-        uint32x2_t prime       = vdup_n_u32 (XXH_PRIME32_1);
 
         size_t i;
-        /* NEON for the first few lanes (these loops are normally interleaved) */
+        /* WASM uses operator overloads and doesn't need these. */
+#ifndef __wasm_simd128__
+        /* { prime32_1, prime32_1 } */
+        uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1);
+        /* { 0, prime32_1, 0, prime32_1 } */
+        uint32x4_t const kPrimeHi = vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64)XXH_PRIME32_1 << 32));
+#endif
+
+        /* AArch64 uses both scalar and neon at the same time */
+        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
+            XXH3_scalarScrambleRound(acc, secret, i);
+        }
         for (i=0; i < XXH3_NEON_LANES / 2; i++) {
             /* xacc[i] ^= (xacc[i] >> 47); */
             uint64x2_t acc_vec  = xacc[i];
-            uint64x2_t shifted  = vshrq_n_u64 (acc_vec, 47);
-            uint64x2_t data_vec = veorq_u64   (acc_vec, shifted);
+            uint64x2_t shifted  = vshrq_n_u64(acc_vec, 47);
+            uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
 
             /* xacc[i] ^= xsecret[i]; */
-            uint8x16_t key_vec  = vld1q_u8    (xsecret + (i * 16));
-            uint64x2_t data_key = veorq_u64   (data_vec, vreinterpretq_u64_u8(key_vec));
-
+            uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
+            uint64x2_t data_key = veorq_u64(data_vec, key_vec);
             /* xacc[i] *= XXH_PRIME32_1 */
-            uint32x2_t data_key_lo, data_key_hi;
-            /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF);
-             * data_key_hi = (uint32x2_t) (xacc[i] >> 32);
-             * xacc[i] = UNDEFINED; */
-            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
-            {   /*
-                 * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
-                 *
-                 * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
-                 * incorrectly "optimize" this:
-                 *   tmp     = vmul_u32(vmovn_u64(a), vmovn_u64(b));
-                 *   shifted = vshll_n_u32(tmp, 32);
-                 * to this:
-                 *   tmp     = "vmulq_u64"(a, b); // no such thing!
-                 *   shifted = vshlq_n_u64(tmp, 32);
-                 *
-                 * However, unlike SSE, Clang lacks a 64-bit multiply routine
-                 * for NEON, and it scalarizes two 64-bit multiplies instead.
-                 *
-                 * vmull_u32 has the same timing as vmul_u32, and it avoids
-                 * this bug completely.
-                 * See https://bugs.llvm.org/show_bug.cgi?id=39967
-                 */
-                uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
-                /* xacc[i] = prod_hi << 32; */
-                xacc[i] = vshlq_n_u64(prod_hi, 32);
-                /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
-                xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime);
-            }
-        }
-        /* Scalar for the remainder. This may be a zero iteration loop. */
-        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
-            XXH3_scalarScrambleRound(acc, secret, i);
+#ifdef __wasm_simd128__
+            /* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */
+            xacc[i] = data_key * XXH_PRIME32_1;
+#else
+            /*
+             * Expanded version with portable NEON intrinsics
+             *
+             *    lo(x) * lo(y) + (hi(x) * lo(y) << 32)
+             *
+             * prod_hi = hi(data_key) * lo(prime) << 32
+             *
+             * Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector
+             * as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits
+             * and avoid the shift.
+             */
+            uint32x4_t prod_hi = vmulq_u32 (vreinterpretq_u32_u64(data_key), kPrimeHi);
+            /* Extract low bits for vmlal_u32  */
+            uint32x2_t data_key_lo = vmovn_u64(data_key);
+            /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */
+            xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo);
+#endif
         }
     }
 }
-
 #endif
 
 #if (XXH_VECTOR == XXH_VSX)
@@ -4178,23 +5387,23 @@ XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,
                     const void* XXH_RESTRICT secret)
 {
     /* presumed aligned */
-    unsigned int* const xacc = (unsigned int*) acc;
-    xxh_u64x2 const* const xinput   = (xxh_u64x2 const*) input;   /* no alignment restriction */
-    xxh_u64x2 const* const xsecret  = (xxh_u64x2 const*) secret;    /* no alignment restriction */
+    xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
+    xxh_u8 const* const xinput   = (xxh_u8 const*) input;   /* no alignment restriction */
+    xxh_u8 const* const xsecret  = (xxh_u8 const*) secret;    /* no alignment restriction */
     xxh_u64x2 const v32 = { 32, 32 };
     size_t i;
     for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
         /* data_vec = xinput[i]; */
-        xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i);
+        xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i);
         /* key_vec = xsecret[i]; */
-        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
+        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + 16*i);
         xxh_u64x2 const data_key = data_vec ^ key_vec;
         /* shuffled = (data_key << 32) | (data_key >> 32); */
         xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
         /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
         xxh_u64x2 const product  = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
         /* acc_vec = xacc[i]; */
-        xxh_u64x2 acc_vec        = (xxh_u64x2)vec_xl(0, xacc + 4 * i);
+        xxh_u64x2 acc_vec        = xacc[i];
         acc_vec += product;
 
         /* swap high and low halves */
@@ -4203,18 +5412,18 @@ XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,
 #else
         acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
 #endif
-        /* xacc[i] = acc_vec; */
-        vec_xst((xxh_u32x4)acc_vec, 0, xacc + 4 * i);
+        xacc[i] = acc_vec;
     }
 }
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
 
 XXH_FORCE_INLINE void
 XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
 {
     XXH_ASSERT((((size_t)acc) & 15) == 0);
 
-    {         xxh_u64x2* const xacc    =       (xxh_u64x2*) acc;
-        const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret;
+    {   xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
+        const xxh_u8* const xsecret = (const xxh_u8*) secret;
         /* constants */
         xxh_u64x2 const v32  = { 32, 32 };
         xxh_u64x2 const v47 = { 47, 47 };
@@ -4226,7 +5435,7 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
             xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
 
             /* xacc[i] ^= xsecret[i]; */
-            xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
+            xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + 16*i);
             xxh_u64x2 const data_key = data_vec ^ key_vec;
 
             /* xacc[i] *= XXH_PRIME32_1 */
@@ -4240,8 +5449,148 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
 
 #endif
 
+#if (XXH_VECTOR == XXH_SVE)
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,
+                   const void* XXH_RESTRICT input,
+                   const void* XXH_RESTRICT secret)
+{
+    uint64_t *xacc = (uint64_t *)acc;
+    const uint64_t *xinput = (const uint64_t *)(const void *)input;
+    const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
+    svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
+    uint64_t element_count = svcntd();
+    if (element_count >= 8) {
+        svbool_t mask = svptrue_pat_b64(SV_VL8);
+        svuint64_t vacc = svld1_u64(mask, xacc);
+        ACCRND(vacc, 0);
+        svst1_u64(mask, xacc, vacc);
+    } else if (element_count == 2) {   /* sve128 */
+        svbool_t mask = svptrue_pat_b64(SV_VL2);
+        svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+        svuint64_t acc1 = svld1_u64(mask, xacc + 2);
+        svuint64_t acc2 = svld1_u64(mask, xacc + 4);
+        svuint64_t acc3 = svld1_u64(mask, xacc + 6);
+        ACCRND(acc0, 0);
+        ACCRND(acc1, 2);
+        ACCRND(acc2, 4);
+        ACCRND(acc3, 6);
+        svst1_u64(mask, xacc + 0, acc0);
+        svst1_u64(mask, xacc + 2, acc1);
+        svst1_u64(mask, xacc + 4, acc2);
+        svst1_u64(mask, xacc + 6, acc3);
+    } else {
+        svbool_t mask = svptrue_pat_b64(SV_VL4);
+        svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+        svuint64_t acc1 = svld1_u64(mask, xacc + 4);
+        ACCRND(acc0, 0);
+        ACCRND(acc1, 4);
+        svst1_u64(mask, xacc + 0, acc0);
+        svst1_u64(mask, xacc + 4, acc1);
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
+               const xxh_u8* XXH_RESTRICT input,
+               const xxh_u8* XXH_RESTRICT secret,
+               size_t nbStripes)
+{
+    if (nbStripes != 0) {
+        uint64_t *xacc = (uint64_t *)acc;
+        const uint64_t *xinput = (const uint64_t *)(const void *)input;
+        const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
+        svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
+        uint64_t element_count = svcntd();
+        if (element_count >= 8) {
+            svbool_t mask = svptrue_pat_b64(SV_VL8);
+            svuint64_t vacc = svld1_u64(mask, xacc + 0);
+            do {
+                /* svprfd(svbool_t, void *, enum svfprop); */
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(vacc, 0);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+           } while (nbStripes != 0);
+
+           svst1_u64(mask, xacc + 0, vacc);
+        } else if (element_count == 2) { /* sve128 */
+            svbool_t mask = svptrue_pat_b64(SV_VL2);
+            svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+            svuint64_t acc1 = svld1_u64(mask, xacc + 2);
+            svuint64_t acc2 = svld1_u64(mask, xacc + 4);
+            svuint64_t acc3 = svld1_u64(mask, xacc + 6);
+            do {
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(acc0, 0);
+                ACCRND(acc1, 2);
+                ACCRND(acc2, 4);
+                ACCRND(acc3, 6);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+           } while (nbStripes != 0);
+
+           svst1_u64(mask, xacc + 0, acc0);
+           svst1_u64(mask, xacc + 2, acc1);
+           svst1_u64(mask, xacc + 4, acc2);
+           svst1_u64(mask, xacc + 6, acc3);
+        } else {
+            svbool_t mask = svptrue_pat_b64(SV_VL4);
+            svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+            svuint64_t acc1 = svld1_u64(mask, xacc + 4);
+            do {
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(acc0, 0);
+                ACCRND(acc1, 4);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+           } while (nbStripes != 0);
+
+           svst1_u64(mask, xacc + 0, acc0);
+           svst1_u64(mask, xacc + 4, acc1);
+       }
+    }
+}
+
+#endif
+
 /* scalar variants - universal */
 
+#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
+/*
+ * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they
+ * emit an excess mask and a full 64-bit multiply-add (MADD X-form).
+ *
+ * While this might not seem like much, as AArch64 is a 64-bit architecture, only
+ * big Cortex designs have a full 64-bit multiplier.
+ *
+ * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit
+ * multiplies expand to 2-3 multiplies in microcode. This has a major penalty
+ * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline.
+ *
+ * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does
+ * not have this penalty and does the mask automatically.
+ */
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
+{
+    xxh_u64 ret;
+    /* note: %x = 64-bit register, %w = 32-bit register */
+    __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc));
+    return ret;
+}
+#else
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
+{
+    return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc;
+}
+#endif
+
 /*!
  * @internal
  * @brief Scalar round for @ref XXH3_accumulate_512_scalar().
@@ -4264,7 +5613,7 @@ XXH3_scalarRound(void* XXH_RESTRICT acc,
         xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
         xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
         xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
-        xacc[lane] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
+        xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]);
     }
 }
 
@@ -4278,10 +5627,18 @@ XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
                      const void* XXH_RESTRICT secret)
 {
     size_t i;
+    /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
+#if defined(__GNUC__) && !defined(__clang__) \
+  && (defined(__arm__) || defined(__thumb2__)) \
+  && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
+  && XXH_SIZE_OPT <= 0
+#  pragma GCC unroll 8
+#endif
     for (i=0; i < XXH_ACC_NB; i++) {
         XXH3_scalarRound(acc, input, secret, i);
     }
 }
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
 
 /*!
  * @internal
@@ -4333,10 +5690,10 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
     const xxh_u8* kSecretPtr = XXH3_kSecret;
     XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
 
-#if defined(__clang__) && defined(__aarch64__)
+#if defined(__GNUC__) && defined(__aarch64__)
     /*
      * UGLY HACK:
-     * Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are
+     * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are
      * placed sequentially, in order, at the top of the unrolled loop.
      *
      * While MOVK is great for generating constants (2 cycles for a 64-bit
@@ -4351,7 +5708,7 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
      * ADD
      * SUB      STR
      *          STR
-     * By forcing loads from memory (as the asm line causes Clang to assume
+     * By forcing loads from memory (as the asm line causes the compiler to assume
      * that XXH3_kSecretPtr has been changed), the pipelines are used more
      * efficiently:
      *   I   L   S
@@ -4368,17 +5725,11 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
      */
     XXH_COMPILER_GUARD(kSecretPtr);
 #endif
-    /*
-     * Note: in debug mode, this overrides the asm optimization
-     * and Clang will emit MOVK chains again.
-     */
-    XXH_ASSERT(kSecretPtr == XXH3_kSecret);
-
     {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
         int i;
         for (i=0; i < nbRounds; i++) {
             /*
-             * The asm hack causes Clang to assume that kSecretPtr aliases with
+             * The asm hack causes the compiler to assume that kSecretPtr aliases with
              * customSecret, and on aarch64, this prevented LDP from merging two
              * loads together for free. Putting the loads together before the stores
              * properly generates LDP.
@@ -4391,7 +5742,7 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
 }
 
 
-typedef void (*XXH3_f_accumulate_512)(void* XXH_RESTRICT, const void*, const void*);
+typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);
 typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
 typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
 
@@ -4399,82 +5750,63 @@ typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
 #if (XXH_VECTOR == XXH_AVX512)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_avx512
+#define XXH3_accumulate     XXH3_accumulate_avx512
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_avx512
 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
 
 #elif (XXH_VECTOR == XXH_AVX2)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_avx2
+#define XXH3_accumulate     XXH3_accumulate_avx2
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_avx2
 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
 
 #elif (XXH_VECTOR == XXH_SSE2)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_sse2
+#define XXH3_accumulate     XXH3_accumulate_sse2
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_sse2
 #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
 
 #elif (XXH_VECTOR == XXH_NEON)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_neon
+#define XXH3_accumulate     XXH3_accumulate_neon
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_neon
 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
 
 #elif (XXH_VECTOR == XXH_VSX)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_vsx
+#define XXH3_accumulate     XXH3_accumulate_vsx
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_vsx
 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
 
+#elif (XXH_VECTOR == XXH_SVE)
+#define XXH3_accumulate_512 XXH3_accumulate_512_sve
+#define XXH3_accumulate     XXH3_accumulate_sve
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
 #else /* scalar */
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
+#define XXH3_accumulate     XXH3_accumulate_scalar
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
 
 #endif
 
-
-
-#ifndef XXH_PREFETCH_DIST
-#  ifdef __clang__
-#    define XXH_PREFETCH_DIST 320
-#  else
-#    if (XXH_VECTOR == XXH_AVX512)
-#      define XXH_PREFETCH_DIST 512
-#    else
-#      define XXH_PREFETCH_DIST 384
-#    endif
-#  endif  /* __clang__ */
-#endif  /* XXH_PREFETCH_DIST */
-
-/*
- * XXH3_accumulate()
- * Loops over XXH3_accumulate_512().
- * Assumption: nbStripes will not overflow the secret size
- */
-XXH_FORCE_INLINE void
-XXH3_accumulate(     xxh_u64* XXH_RESTRICT acc,
-                const xxh_u8* XXH_RESTRICT input,
-                const xxh_u8* XXH_RESTRICT secret,
-                      size_t nbStripes,
-                      XXH3_f_accumulate_512 f_acc512)
-{
-    size_t n;
-    for (n = 0; n < nbStripes; n++ ) {
-        const xxh_u8* const in = input + n*XXH_STRIPE_LEN;
-        XXH_PREFETCH(in + XXH_PREFETCH_DIST);
-        f_acc512(acc,
-                 in,
-                 secret + n*XXH_SECRET_CONSUME_RATE);
-    }
-}
+#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
+#  undef XXH3_initCustomSecret
+#  define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+#endif
 
 XXH_FORCE_INLINE void
 XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
                       const xxh_u8* XXH_RESTRICT input, size_t len,
                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                            XXH3_f_accumulate_512 f_acc512,
+                            XXH3_f_accumulate f_acc,
                             XXH3_f_scrambleAcc f_scramble)
 {
     size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
@@ -4486,7 +5818,7 @@ XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
 
     for (n = 0; n < nb_blocks; n++) {
-        XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512);
+        f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);
         f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
     }
 
@@ -4494,12 +5826,12 @@ XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
     XXH_ASSERT(len > XXH_STRIPE_LEN);
     {   size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
         XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
-        XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512);
+        f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);
 
         /* last stripe */
         {   const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
 #define XXH_SECRET_LASTACC_START 7  /* not aligned on 8, last secret is different from acc & scrambler */
-            f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
+            XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
     }   }
 }
 
@@ -4544,12 +5876,12 @@ XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secre
 XXH_FORCE_INLINE XXH64_hash_t
 XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
                            const void* XXH_RESTRICT secret, size_t secretSize,
-                           XXH3_f_accumulate_512 f_acc512,
+                           XXH3_f_accumulate f_acc,
                            XXH3_f_scrambleAcc f_scramble)
 {
     XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
 
-    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc512, f_scramble);
+    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);
 
     /* converge into final hash */
     XXH_STATIC_ASSERT(sizeof(acc) == 64);
@@ -4563,13 +5895,15 @@ XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
  * It's important for performance to transmit secret's size (when it's static)
  * so that the compiler can properly optimize the vectorized loop.
  * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
+ * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
+ * breaks -Og, this is XXH_NO_INLINE.
  */
-XXH_FORCE_INLINE XXH64_hash_t
+XXH3_WITH_SECRET_INLINE XXH64_hash_t
 XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
                              XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)seed64;
-    return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambleAcc);
+    return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 /*
@@ -4578,12 +5912,12 @@ XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
  * Note that inside this no_inline function, we do inline the internal loop,
  * and provide a statically defined secret size to allow optimization of vector loop.
  */
-XXH_NO_INLINE XXH64_hash_t
+XXH_NO_INLINE XXH_PUREF XXH64_hash_t
 XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
                           XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)seed64; (void)secret; (void)secretLen;
-    return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512, XXH3_scrambleAcc);
+    return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 /*
@@ -4600,18 +5934,20 @@ XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
 XXH_FORCE_INLINE XXH64_hash_t
 XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
                                     XXH64_hash_t seed,
-                                    XXH3_f_accumulate_512 f_acc512,
+                                    XXH3_f_accumulate f_acc,
                                     XXH3_f_scrambleAcc f_scramble,
                                     XXH3_f_initCustomSecret f_initSec)
 {
+#if XXH_SIZE_OPT <= 0
     if (seed == 0)
         return XXH3_hashLong_64b_internal(input, len,
                                           XXH3_kSecret, sizeof(XXH3_kSecret),
-                                          f_acc512, f_scramble);
+                                          f_acc, f_scramble);
+#endif
     {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
         f_initSec(secret, seed);
         return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
-                                          f_acc512, f_scramble);
+                                          f_acc, f_scramble);
     }
 }
 
@@ -4619,12 +5955,12 @@ XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
  * It's important for performance that XXH3_hashLong is not inlined.
  */
 XXH_NO_INLINE XXH64_hash_t
-XXH3_hashLong_64b_withSeed(const void* input, size_t len,
-                           XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen)
+XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,
+                           XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)secret; (void)secretLen;
     return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
-                XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+                XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
 }
 
 
@@ -4656,37 +5992,37 @@ XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
 
 /* ===   Public entry point   === */
 
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length)
 {
-    return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
+    return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)
 {
-    return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
+    return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)
 {
-    return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
+    return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
 }
 
 XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
+XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
 {
-    if (len <= XXH3_MIDSIZE_MAX)
-        return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
-    return XXH3_hashLong_64b_withSecret(input, len, seed, (const xxh_u8*)secret, secretSize);
+    if (length <= XXH3_MIDSIZE_MAX)
+        return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
+    return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
 }
 
 
 /* ===   XXH3 streaming   === */
-
+#ifndef XXH_NO_STREAM
 /*
  * Malloc's a pointer that is always aligned to align.
  *
@@ -4710,7 +6046,7 @@ XXH3_64bits_withSecretandSeed(const void* input, size_t len, const void* secret,
  *
  * Align must be a power of 2 and 8 <= align <= 128.
  */
-static void* XXH_alignedMalloc(size_t s, size_t align)
+static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)
 {
     XXH_ASSERT(align <= 128 && align >= 8); /* range check */
     XXH_ASSERT((align & (align-1)) == 0);   /* power of 2 */
@@ -4752,7 +6088,17 @@ static void XXH_alignedFree(void* p)
         XXH_free(base);
     }
 }
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
+/*!
+ * @brief Allocate an @ref XXH3_state_t.
+ *
+ * @return An allocated pointer of @ref XXH3_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH3_freeState().
+ *
+ * @see @ref streaming_example "Streaming Example"
+ */
 XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
 {
     XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
@@ -4761,16 +6107,27 @@ XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
     return state;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
+/*!
+ * @brief Frees an @ref XXH3_state_t.
+ *
+ * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note Must be allocated with XXH3_createState().
+ *
+ * @see @ref streaming_example "Streaming Example"
+ */
 XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
 {
     XXH_alignedFree(statePtr);
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API void
-XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
+XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state)
 {
     XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
 }
@@ -4802,18 +6159,18 @@ XXH3_reset_internal(XXH3_state_t* statePtr,
     statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset(XXH3_state_t* statePtr)
+XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
 {
     if (statePtr == NULL) return XXH_ERROR;
     XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
 {
     if (statePtr == NULL) return XXH_ERROR;
     XXH3_reset_internal(statePtr, 0, secret, secretSize);
@@ -4822,9 +6179,9 @@ XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
 {
     if (statePtr == NULL) return XXH_ERROR;
     if (seed==0) return XXH3_64bits_reset(statePtr);
@@ -4834,9 +6191,9 @@ XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed64)
+XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)
 {
     if (statePtr == NULL) return XXH_ERROR;
     if (secret == NULL) return XXH_ERROR;
@@ -4846,35 +6203,61 @@ XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret,
     return XXH_OK;
 }
 
-/* Note : when XXH3_consumeStripes() is invoked,
- * there must be a guarantee that at least one more byte must be consumed from input
- * so that the function can blindly consume all stripes using the "normal" secret segment */
-XXH_FORCE_INLINE void
+/*!
+ * @internal
+ * @brief Processes a large input for XXH3_update() and XXH3_digest_long().
+ *
+ * Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block.
+ *
+ * @param acc                Pointer to the 8 accumulator lanes
+ * @param nbStripesSoFarPtr  In/out pointer to the number of leftover stripes in the block*
+ * @param nbStripesPerBlock  Number of stripes in a block
+ * @param input              Input pointer
+ * @param nbStripes          Number of stripes to process
+ * @param secret             Secret pointer
+ * @param secretLimit        Offset of the last block in @p secret
+ * @param f_acc              Pointer to an XXH3_accumulate implementation
+ * @param f_scramble         Pointer to an XXH3_scrambleAcc implementation
+ * @return                   Pointer past the end of @p input after processing
+ */
+XXH_FORCE_INLINE const xxh_u8 *
 XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
                     size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
                     const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
                     const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
-                    XXH3_f_accumulate_512 f_acc512,
+                    XXH3_f_accumulate f_acc,
                     XXH3_f_scrambleAcc f_scramble)
 {
-    XXH_ASSERT(nbStripes <= nbStripesPerBlock);  /* can handle max 1 scramble per invocation */
-    XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
-    if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
-        /* need a scrambling operation */
-        size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
-        size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
-        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512);
-        f_scramble(acc, secret + secretLimit);
-        XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512);
-        *nbStripesSoFarPtr = nbStripesAfterBlock;
-    } else {
-        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512);
+    const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
+    /* Process full blocks */
+    if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
+        /* Process the initial partial block... */
+        size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
+
+        do {
+            /* Accumulate and scramble */
+            f_acc(acc, input, initialSecret, nbStripesThisIter);
+            f_scramble(acc, secret + secretLimit);
+            input += nbStripesThisIter * XXH_STRIPE_LEN;
+            nbStripes -= nbStripesThisIter;
+            /* Then continue the loop with the full block size */
+            nbStripesThisIter = nbStripesPerBlock;
+            initialSecret = secret;
+        } while (nbStripes >= nbStripesPerBlock);
+        *nbStripesSoFarPtr = 0;
+    }
+    /* Process a partial block */
+    if (nbStripes > 0) {
+        f_acc(acc, input, initialSecret, nbStripes);
+        input += nbStripes * XXH_STRIPE_LEN;
         *nbStripesSoFarPtr += nbStripes;
     }
+    /* Return end pointer */
+    return input;
 }
 
 #ifndef XXH3_STREAM_USE_STACK
-# ifndef __clang__ /* clang doesn't need additional stack space */
+# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
 #   define XXH3_STREAM_USE_STACK 1
 # endif
 #endif
@@ -4884,7 +6267,7 @@ XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
 XXH_FORCE_INLINE XXH_errorcode
 XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
             const xxh_u8* XXH_RESTRICT input, size_t len,
-            XXH3_f_accumulate_512 f_acc512,
+            XXH3_f_accumulate f_acc,
             XXH3_f_scrambleAcc f_scramble)
 {
     if (input==NULL) {
@@ -4900,7 +6283,8 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
          * when operating accumulators directly into state.
          * Operating into stack space seems to enable proper optimization.
          * clang, on the other hand, doesn't seem to need this trick */
-        XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
+        XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
+        XXH_memcpy(acc, state->acc, sizeof(acc));
 #else
         xxh_u64* XXH_RESTRICT const acc = state->acc;
 #endif
@@ -4908,7 +6292,7 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
         XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
 
         /* small input : just fill in tmp buffer */
-        if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
+        if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
             XXH_memcpy(state->buffer + state->bufferedSize, input, len);
             state->bufferedSize += (XXH32_hash_t)len;
             return XXH_OK;
@@ -4930,57 +6314,20 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
                                &state->nbStripesSoFar, state->nbStripesPerBlock,
                                 state->buffer, XXH3_INTERNALBUFFER_STRIPES,
                                 secret, state->secretLimit,
-                                f_acc512, f_scramble);
+                                f_acc, f_scramble);
             state->bufferedSize = 0;
         }
         XXH_ASSERT(input < bEnd);
-
-        /* large input to consume : ingest per full block */
-        if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
+        if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
             size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
-            XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
-            /* join to current block's end */
-            {   size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
-                XXH_ASSERT(nbStripesToEnd <= nbStripes);
-                XXH3_accumulate(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd, f_acc512);
-                f_scramble(acc, secret + state->secretLimit);
-                state->nbStripesSoFar = 0;
-                input += nbStripesToEnd * XXH_STRIPE_LEN;
-                nbStripes -= nbStripesToEnd;
-            }
-            /* consume per entire blocks */
-            while(nbStripes >= state->nbStripesPerBlock) {
-                XXH3_accumulate(acc, input, secret, state->nbStripesPerBlock, f_acc512);
-                f_scramble(acc, secret + state->secretLimit);
-                input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
-                nbStripes -= state->nbStripesPerBlock;
-            }
-            /* consume last partial block */
-            XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);
-            input += nbStripes * XXH_STRIPE_LEN;
-            XXH_ASSERT(input < bEnd);  /* at least some bytes left */
-            state->nbStripesSoFar = nbStripes;
-            /* buffer predecessor of last partial stripe */
-            XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
-            XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
-        } else {
-            /* content to consume <= block size */
-            /* Consume input by a multiple of internal buffer size */
-            if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
-                const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
-                do {
-                    XXH3_consumeStripes(acc,
+            input = XXH3_consumeStripes(acc,
                                        &state->nbStripesSoFar, state->nbStripesPerBlock,
-                                        input, XXH3_INTERNALBUFFER_STRIPES,
-                                        secret, state->secretLimit,
-                                        f_acc512, f_scramble);
-                    input += XXH3_INTERNALBUFFER_SIZE;
-                } while (inputbuffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
-            }
-        }
+                                       input, nbStripes,
+                                       secret, state->secretLimit,
+                                       f_acc, f_scramble);
+            XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
 
+        }
         /* Some remaining input (always) : buffer it */
         XXH_ASSERT(input < bEnd);
         XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
@@ -4989,19 +6336,19 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
         state->bufferedSize = (XXH32_hash_t)(bEnd-input);
 #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
         /* save stack accumulators into state */
-        memcpy(state->acc, acc, sizeof(acc));
+        XXH_memcpy(state->acc, acc, sizeof(acc));
 #endif
     }
 
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
+XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
 {
     return XXH3_update(state, (const xxh_u8*)input, len,
-                       XXH3_accumulate_512, XXH3_scrambleAcc);
+                       XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 
@@ -5010,37 +6357,40 @@ XXH3_digest_long (XXH64_hash_t* acc,
                   const XXH3_state_t* state,
                   const unsigned char* secret)
 {
+    xxh_u8 lastStripe[XXH_STRIPE_LEN];
+    const xxh_u8* lastStripePtr;
+
     /*
      * Digest on a local copy. This way, the state remains unaltered, and it can
      * continue ingesting more input afterwards.
      */
     XXH_memcpy(acc, state->acc, sizeof(state->acc));
     if (state->bufferedSize >= XXH_STRIPE_LEN) {
+        /* Consume remaining stripes then point to remaining data in buffer */
         size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
         size_t nbStripesSoFar = state->nbStripesSoFar;
         XXH3_consumeStripes(acc,
                            &nbStripesSoFar, state->nbStripesPerBlock,
                             state->buffer, nbStripes,
                             secret, state->secretLimit,
-                            XXH3_accumulate_512, XXH3_scrambleAcc);
-        /* last stripe */
-        XXH3_accumulate_512(acc,
-                            state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
-                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+                            XXH3_accumulate, XXH3_scrambleAcc);
+        lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
     } else {  /* bufferedSize < XXH_STRIPE_LEN */
-        xxh_u8 lastStripe[XXH_STRIPE_LEN];
+        /* Copy to temp buffer */
         size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
         XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */
         XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
         XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
-        XXH3_accumulate_512(acc,
-                            lastStripe,
-                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+        lastStripePtr = lastStripe;
     }
+    /* Last stripe */
+    XXH3_accumulate_512(acc,
+                        lastStripePtr,
+                        secret + state->secretLimit - XXH_SECRET_LASTACC_START);
 }
 
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
 {
     const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
     if (state->totalLen > XXH3_MIDSIZE_MAX) {
@@ -5056,7 +6406,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
     return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
                                   secret, state->secretLimit + XXH_STRIPE_LEN);
 }
-
+#endif /* !XXH_NO_STREAM */
 
 
 /* ==========================================
@@ -5076,7 +6426,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
  * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
  */
 
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     /* A doubled version of 1to3_64b with different constants. */
@@ -5105,7 +6455,7 @@ XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
     }
 }
 
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -5125,14 +6475,14 @@ XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
         m128.low64  ^= (m128.high64 >> 3);
 
         m128.low64   = XXH_xorshift64(m128.low64, 35);
-        m128.low64  *= 0x9FB21C651E98DF25ULL;
+        m128.low64  *= PRIME_MX2;
         m128.low64   = XXH_xorshift64(m128.low64, 28);
         m128.high64  = XXH3_avalanche(m128.high64);
         return m128;
     }
 }
 
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -5207,7 +6557,7 @@ XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64
 /*
  * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
  */
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(len <= 16);
@@ -5238,7 +6588,7 @@ XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
 }
 
 
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                       XXH64_hash_t seed)
@@ -5249,6 +6599,16 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
     {   XXH128_hash_t acc;
         acc.low64 = len * XXH_PRIME64_1;
         acc.high64 = 0;
+
+#if XXH_SIZE_OPT >= 1
+        {
+            /* Smaller, but slightly slower. */
+            unsigned int i = (unsigned int)(len - 1) / 32;
+            do {
+                acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
+            } while (i-- != 0);
+        }
+#else
         if (len > 32) {
             if (len > 64) {
                 if (len > 96) {
@@ -5259,6 +6619,7 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
             acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
         }
         acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
+#endif
         {   XXH128_hash_t h128;
             h128.low64  = acc.low64 + acc.high64;
             h128.high64 = (acc.low64    * XXH_PRIME64_1)
@@ -5271,7 +6632,7 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
     }
 }
 
-XXH_NO_INLINE XXH128_hash_t
+XXH_NO_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
                        const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                        XXH64_hash_t seed)
@@ -5280,25 +6641,34 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
     XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
 
     {   XXH128_hash_t acc;
-        int const nbRounds = (int)len / 32;
-        int i;
+        unsigned i;
         acc.low64 = len * XXH_PRIME64_1;
         acc.high64 = 0;
-        for (i=0; i<4; i++) {
+        /*
+         *  We set as `i` as offset + 32. We do this so that unchanged
+         * `len` can be used as upper bound. This reaches a sweet spot
+         * where both x86 and aarch64 get simple agen and good codegen
+         * for the loop.
+         */
+        for (i = 32; i < 160; i += 32) {
             acc = XXH128_mix32B(acc,
-                                input  + (32 * i),
-                                input  + (32 * i) + 16,
-                                secret + (32 * i),
+                                input  + i - 32,
+                                input  + i - 16,
+                                secret + i - 32,
                                 seed);
         }
         acc.low64 = XXH3_avalanche(acc.low64);
         acc.high64 = XXH3_avalanche(acc.high64);
-        XXH_ASSERT(nbRounds >= 4);
-        for (i=4 ; i < nbRounds; i++) {
+        /*
+         * NB: `i <= len` will duplicate the last 32-bytes if
+         * len % 32 was zero. This is an unfortunate necessity to keep
+         * the hash result stable.
+         */
+        for (i=160; i <= len; i += 32) {
             acc = XXH128_mix32B(acc,
-                                input + (32 * i),
-                                input + (32 * i) + 16,
-                                secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)),
+                                input + i - 32,
+                                input + i - 16,
+                                secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
                                 seed);
         }
         /* last bytes */
@@ -5306,7 +6676,7 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
                             input + len - 16,
                             input + len - 32,
                             secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
-                            0ULL - seed);
+                            (XXH64_hash_t)0 - seed);
 
         {   XXH128_hash_t h128;
             h128.low64  = acc.low64 + acc.high64;
@@ -5323,12 +6693,12 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
 XXH_FORCE_INLINE XXH128_hash_t
 XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
                             const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                            XXH3_f_accumulate_512 f_acc512,
+                            XXH3_f_accumulate f_acc,
                             XXH3_f_scrambleAcc f_scramble)
 {
     XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
 
-    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramble);
+    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);
 
     /* converge into final hash */
     XXH_STATIC_ASSERT(sizeof(acc) == 64);
@@ -5346,47 +6716,50 @@ XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
 }
 
 /*
- * It's important for performance that XXH3_hashLong is not inlined.
+ * It's important for performance that XXH3_hashLong() is not inlined.
  */
-XXH_NO_INLINE XXH128_hash_t
+XXH_NO_INLINE XXH_PUREF XXH128_hash_t
 XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
                            XXH64_hash_t seed64,
                            const void* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)seed64; (void)secret; (void)secretLen;
     return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
-                                       XXH3_accumulate_512, XXH3_scrambleAcc);
+                                       XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 /*
- * It's important for performance to pass @secretLen (when it's static)
+ * It's important for performance to pass @p secretLen (when it's static)
  * to the compiler, so that it can properly optimize the vectorized loop.
+ *
+ * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
+ * breaks -Og, this is XXH_NO_INLINE.
  */
-XXH_FORCE_INLINE XXH128_hash_t
+XXH3_WITH_SECRET_INLINE XXH128_hash_t
 XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
                               XXH64_hash_t seed64,
                               const void* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)seed64;
     return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
-                                       XXH3_accumulate_512, XXH3_scrambleAcc);
+                                       XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 XXH_FORCE_INLINE XXH128_hash_t
 XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
                                 XXH64_hash_t seed64,
-                                XXH3_f_accumulate_512 f_acc512,
+                                XXH3_f_accumulate f_acc,
                                 XXH3_f_scrambleAcc f_scramble,
                                 XXH3_f_initCustomSecret f_initSec)
 {
     if (seed64 == 0)
         return XXH3_hashLong_128b_internal(input, len,
                                            XXH3_kSecret, sizeof(XXH3_kSecret),
-                                           f_acc512, f_scramble);
+                                           f_acc, f_scramble);
     {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
         f_initSec(secret, seed64);
         return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
-                                           f_acc512, f_scramble);
+                                           f_acc, f_scramble);
     }
 }
 
@@ -5399,7 +6772,7 @@ XXH3_hashLong_128b_withSeed(const void* input, size_t len,
 {
     (void)secret; (void)secretLen;
     return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
-                XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+                XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
 }
 
 typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
@@ -5429,94 +6802,93 @@ XXH3_128bits_internal(const void* input, size_t len,
 
 /* ===   Public XXH128 API   === */
 
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len)
 {
     return XXH3_128bits_internal(input, len, 0,
                                  XXH3_kSecret, sizeof(XXH3_kSecret),
                                  XXH3_hashLong_128b_default);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)
 {
     return XXH3_128bits_internal(input, len, 0,
                                  (const xxh_u8*)secret, secretSize,
                                  XXH3_hashLong_128b_withSecret);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
 {
     return XXH3_128bits_internal(input, len, seed,
                                  XXH3_kSecret, sizeof(XXH3_kSecret),
                                  XXH3_hashLong_128b_withSeed);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
+XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
 {
     if (len <= XXH3_MIDSIZE_MAX)
         return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
     return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH128(const void* input, size_t len, XXH64_hash_t seed)
+XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
 {
     return XXH3_128bits_withSeed(input, len, seed);
 }
 
 
 /* ===   XXH3 128-bit streaming   === */
-
+#ifndef XXH_NO_STREAM
 /*
  * All initialization and update functions are identical to 64-bit streaming variant.
  * The only difference is the finalization routine.
  */
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset(XXH3_state_t* statePtr)
+XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
 {
     return XXH3_64bits_reset(statePtr);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
 {
     return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
 {
     return XXH3_64bits_reset_withSeed(statePtr, seed);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed)
+XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
 {
     return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
+XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
 {
-    return XXH3_update(state, (const xxh_u8*)input, len,
-                       XXH3_accumulate_512, XXH3_scrambleAcc);
+    return XXH3_64bits_update(state, input, len);
 }
 
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
 {
     const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
     if (state->totalLen > XXH3_MIDSIZE_MAX) {
@@ -5540,13 +6912,13 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
     return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
                                    secret, state->secretLimit + XXH_STRIPE_LEN);
 }
-
+#endif /* !XXH_NO_STREAM */
 /* 128-bit utility functions */
 
 #include    /* memcmp, memcpy */
 
 /* return : 1 is equal, 0 if different */
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
 {
     /* note : XXH128_hash_t is compact, it has no padding byte */
@@ -5554,11 +6926,11 @@ XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
 }
 
 /* This prototype is compatible with stdlib's qsort().
- * return : >0 if *h128_1  > *h128_2
- *          <0 if *h128_1  < *h128_2
- *          =0 if *h128_1 == *h128_2  */
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
+ * @return : >0 if *h128_1  > *h128_2
+ *           <0 if *h128_1  < *h128_2
+ *           =0 if *h128_1 == *h128_2  */
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)
 {
     XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
     XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
@@ -5570,9 +6942,9 @@ XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
 
 
 /*======   Canonical representation   ======*/
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API void
-XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
+XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash)
 {
     XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
     if (XXH_CPU_LITTLE_ENDIAN) {
@@ -5583,9 +6955,9 @@ XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
     XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH128_hashFromCanonical(const XXH128_canonical_t* src)
+XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src)
 {
     XXH128_hash_t h;
     h.high64 = XXH_readBE64(src);
@@ -5607,9 +6979,9 @@ XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
     XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize)
+XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)
 {
 #if (XXH_DEBUGLEVEL >= 1)
     XXH_ASSERT(secretBuffer != NULL);
@@ -5652,9 +7024,9 @@ XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSee
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API void
-XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
+XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
 {
     XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
     XXH3_initCustomSecret(secret, seed);
@@ -5667,7 +7039,7 @@ XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
 /* Pop our optimization override from above */
 #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
   && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
-  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
+  && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
 #  pragma GCC pop_options
 #endif
 
@@ -5682,5 +7054,5 @@ XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
 
 
 #if defined (__cplusplus)
-}
+} /* extern "C" */
 #endif

From 3fd5f9f52dff5e4e8a9afcf9afb1abc946844535 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 13 Nov 2023 15:46:59 -0800
Subject: [PATCH 118/283] fix the copyright linter

---
 lib/common/xxhash.c | 2 +-
 lib/common/xxhash.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/common/xxhash.c b/lib/common/xxhash.c
index 1a8f735ba2d..50640c76539 100644
--- a/lib/common/xxhash.c
+++ b/lib/common/xxhash.c
@@ -1,6 +1,6 @@
 /*
  * xxHash - Extremely Fast Hash algorithm
- * Copyright (C) 2012-2023 Yann Collet
+ * Copyright (c) Yann Collet - Meta Platforms, Inc
  *
  * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
  *
diff --git a/lib/common/xxhash.h b/lib/common/xxhash.h
index fe3abc1b461..2abdc07349b 100644
--- a/lib/common/xxhash.h
+++ b/lib/common/xxhash.h
@@ -1,7 +1,7 @@
 /*
  * xxHash - Extremely Fast Hash algorithm
  * Header File
- * Copyright (C) 2012-2023 Yann Collet
+ * Copyright (c) Yann Collet - Meta Platforms, Inc
  *
  * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
  *

From 59dcc475798b3e522be8cd3ba41a170b34c10d63 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Thu, 16 Nov 2023 16:19:25 -0800
Subject: [PATCH 119/283] update license text

---
 lib/common/xxhash.c | 37 ++++++-------------------------------
 lib/common/xxhash.h | 32 ++++----------------------------
 2 files changed, 10 insertions(+), 59 deletions(-)

diff --git a/lib/common/xxhash.c b/lib/common/xxhash.c
index 50640c76539..052cd522824 100644
--- a/lib/common/xxhash.c
+++ b/lib/common/xxhash.c
@@ -2,42 +2,17 @@
  * xxHash - Extremely Fast Hash algorithm
  * Copyright (c) Yann Collet - Meta Platforms, Inc
  *
- * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above
- *      copyright notice, this list of conditions and the following disclaimer
- *      in the documentation and/or other materials provided with the
- *      distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * You can contact the author at:
- *   - xxHash homepage: https://www.xxhash.com
- *   - xxHash source repository: https://github.com/Cyan4973/xxHash
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 
-
 /*
  * xxhash.c instantiates functions defined in xxhash.h
  */
 
-#define XXH_STATIC_LINKING_ONLY   /* access advanced declarations */
-#define XXH_IMPLEMENTATION   /* access definitions */
+#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
+#define XXH_IMPLEMENTATION      /* access definitions */
 
 #include "xxhash.h"
diff --git a/lib/common/xxhash.h b/lib/common/xxhash.h
index 2abdc07349b..424ed19b8a0 100644
--- a/lib/common/xxhash.h
+++ b/lib/common/xxhash.h
@@ -3,34 +3,10 @@
  * Header File
  * Copyright (c) Yann Collet - Meta Platforms, Inc
  *
- * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above
- *      copyright notice, this list of conditions and the following disclaimer
- *      in the documentation and/or other materials provided with the
- *      distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * You can contact the author at:
- *   - xxHash homepage: https://www.xxhash.com
- *   - xxHash source repository: https://github.com/Cyan4973/xxHash
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 
 /* Local adaptations for Zstandard */

From a419265d30f4fa05caa8df0b12fac1ce2558ec6a Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Thu, 16 Nov 2023 12:00:11 -0800
Subject: [PATCH 120/283] [linux] Backport intptr_t removal

Linux started providing intptr_t in  so we no longer need
to define it here.

https://lkml.kernel.org/r/ed66b9e4-1fb7-45be-9bb9-d4bc291c691f@p183
---
 contrib/linux-kernel/zstd_deps.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/contrib/linux-kernel/zstd_deps.h b/contrib/linux-kernel/zstd_deps.h
index 670c5fa2a95..f931f7d0e29 100644
--- a/contrib/linux-kernel/zstd_deps.h
+++ b/contrib/linux-kernel/zstd_deps.h
@@ -115,11 +115,7 @@ static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
 #ifndef ZSTD_DEPS_STDINT
 #define ZSTD_DEPS_STDINT
 
-/*
- * The Linux Kernel doesn't provide intptr_t, only uintptr_t, which
- * is an unsigned long.
- */
-typedef long intptr_t;
+/* intptr_t already provided by ZSTD_DEPS_COMMON */
 
 #endif /* ZSTD_DEPS_STDINT */
 #endif /* ZSTD_DEPS_NEED_STDINT */

From c2d470581eaee3dc9f747dbab16d1fc0816f94aa Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Thu, 16 Nov 2023 16:53:44 -0800
Subject: [PATCH 121/283] [linux] Remove usage of deprecated function

ZSTD_resetDStream() is deprecated and replaced by ZSTD_DCtx_reset().
This removes deprecation warnings from the kernel build.

This change is a no-op, see the docs suggesting this replacement.

https://github.com/facebook/zstd/blob/fcbf2fde9ac7ce1562c7b3a394350e764bcb580f/lib/zstd.h#L2655-L2663
---
 contrib/linux-kernel/zstd_decompress_module.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/linux-kernel/zstd_decompress_module.c b/contrib/linux-kernel/zstd_decompress_module.c
index eb1c49e6972..7d31518e9d5 100644
--- a/contrib/linux-kernel/zstd_decompress_module.c
+++ b/contrib/linux-kernel/zstd_decompress_module.c
@@ -77,7 +77,7 @@ EXPORT_SYMBOL(zstd_init_dstream);
 
 size_t zstd_reset_dstream(zstd_dstream *dstream)
 {
-	return ZSTD_resetDStream(dstream);
+	return ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only);
 }
 EXPORT_SYMBOL(zstd_reset_dstream);
 

From e122fcbf58e142e837a2bba382ef7ca4f5eaa13b Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Thu, 16 Nov 2023 17:15:25 -0800
Subject: [PATCH 122/283] [debug] Don't define g_debuglevel in the kernel

We only use this constant when `DEBUGLEVEL>=2`, but we get
-Werror=pedantic errors for empty translation units, so still define it
except in kernel environments.

Backport from the kernel:

https://lore.kernel.org/lkml/20230616144400.172683-1-ben.dooks@codethink.co.uk/
---
 lib/common/debug.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lib/common/debug.c b/lib/common/debug.c
index ebf7bfccfa6..9d0b7d229c1 100644
--- a/lib/common/debug.c
+++ b/lib/common/debug.c
@@ -21,4 +21,10 @@
 
 #include "debug.h"
 
+#if !defined(ZSTD_LINUX_KERNEL) || (DEBUGLEVEL>=2)
+/* We only use this when DEBUGLEVEL>=2, but we get -Werror=pedantic errors if a
+ * translation unit is empty. So remove this from Linux kernel builds, but
+ * otherwise just leave it in.
+ */
 int g_debuglevel = DEBUGLEVEL;
+#endif

From c7269add7eaf028ed828d9af41e732cf01993aad Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Fri, 17 Nov 2023 18:20:19 -0800
Subject: [PATCH 123/283] [huf] Improve fast huffman decoding speed in linux
 kernel

gcc in the linux kernel was not unrolling the inner loops of the Huffman
decoder, which was destroying decoding performance. The compiler was
generating crazy code with all sorts of branches. I suspect because of
Spectre mitigations, but I'm not certain. Once the loops were manually
unrolled, performance was restored.

Additionally, when gcc couldn't prove that the variable left shift in
the 4X2 decode loop wasn't greater than 63, it inserted checks to verify
it. To fix this, mask `entry.nbBits & 0x3F`, which allows gcc to eliete
this check. This is a no op, because `entry.nbBits` is guaranteed to be
less than 64.

Lastly, introduce the `HUF_DISABLE_FAST_DECODE` macro to disable the
fast C loops for Issue #3762. So if even after this change, there is a
performance regression, users can opt-out at compile time.
---
 lib/README.md                   |   4 +
 lib/decompress/huf_decompress.c | 171 ++++++++++++++++++++------------
 2 files changed, 109 insertions(+), 66 deletions(-)

diff --git a/lib/README.md b/lib/README.md
index 572b7df78a1..a560f06cada 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -178,6 +178,10 @@ The file structure is designed to make this selection manually achievable for an
   `ZSTDERRORLIB_VSIBILITY`, and `ZDICTLIB_VISIBILITY` if unset, for backwards compatibility
   with the old macro names.
 
+- The C compiler macro `HUF_DISABLE_FAST_DECODE` disables the newer Huffman fast C
+  and assembly decoding loops. You may want to use this macro if these loops are
+  slower on your platform.
+
 #### Windows : using MinGW+MSYS to create DLL
 
 DLL can be created using MinGW+MSYS with the `make libzstd` command.
diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c
index 15e4204024d..5bbdef49ad5 100644
--- a/lib/decompress/huf_decompress.c
+++ b/lib/decompress/huf_decompress.c
@@ -34,6 +34,12 @@
 *  Macros
 ****************************************************************/
 
+#ifdef HUF_DISABLE_FAST_DECODE
+# define HUF_ENABLE_FAST_DECODE 0
+#else
+# define HUF_ENABLE_FAST_DECODE 1
+#endif
+
 /* These two optional macros force the use one way or another of the two
  * Huffman decompression implementations. You can't force in both directions
  * at the same time.
@@ -292,6 +298,24 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArg
     return 0;
 }
 
+/* Calls X(N) for each stream 0, 1, 2, 3. */
+#define HUF_4X_FOR_EACH_STREAM(X) \
+    {                             \
+        X(0)                      \
+        X(1)                      \
+        X(2)                      \
+        X(3)                      \
+    }
+
+/* Calls X(N, var) for each stream 0, 1, 2, 3. */
+#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \
+    {                                           \
+        X(0, (var))                             \
+        X(1, (var))                             \
+        X(2, (var))                             \
+        X(3, (var))                             \
+    }
+
 
 #ifndef HUF_FORCE_DECOMPRESS_X2
 
@@ -706,7 +730,6 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
     for (;;) {
         BYTE* olimit;
         int stream;
-        int symbol;
 
         /* Assert loop preconditions */
 #ifndef NDEBUG
@@ -753,27 +776,42 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
         }
 #endif
 
+#define HUF_4X1_DECODE_SYMBOL(_stream, _symbol)                 \
+    {                                                           \
+        int const index = (int)(bits[(_stream)] >> 53);         \
+        int const entry = (int)dtable[index];                   \
+        bits[(_stream)] <<= (entry & 0x3F);                     \
+        op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \
+    }
+
+#define HUF_4X1_RELOAD_STREAM(_stream)                              \
+    {                                                               \
+        int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
+        int const nbBits = ctz & 7;                                 \
+        int const nbBytes = ctz >> 3;                               \
+        op[(_stream)] += 5;                                         \
+        ip[(_stream)] -= nbBytes;                                   \
+        bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1;            \
+        bits[(_stream)] <<= nbBits;                                 \
+    }
+
+        /* Manually unroll the loop because compilers don't consistently
+         * unroll the inner loops, which destroys performance.
+         */
         do {
             /* Decode 5 symbols in each of the 4 streams */
-            for (symbol = 0; symbol < 5; ++symbol) {
-                for (stream = 0; stream < 4; ++stream) {
-                    int const index = (int)(bits[stream] >> 53);
-                    int const entry = (int)dtable[index];
-                    bits[stream] <<= (entry & 63);
-                    op[stream][symbol] = (BYTE)((entry >> 8) & 0xFF);
-                }
-            }
-            /* Reload the bitstreams */
-            for (stream = 0; stream < 4; ++stream) {
-                int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
-                int const nbBits = ctz & 7;
-                int const nbBytes = ctz >> 3;
-                op[stream] += 5;
-                ip[stream] -= nbBytes;
-                bits[stream] = MEM_read64(ip[stream]) | 1;
-                bits[stream] <<= nbBits;
-            }
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0)
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1)
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2)
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3)
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4)
+
+            /* Reload each of the 4 the bitstreams */
+            HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM)
         } while (op[3] < olimit);
+
+#undef HUF_4X1_DECODE_SYMBOL
+#undef HUF_4X1_RELOAD_STREAM
     }
 
 _out:
@@ -869,7 +907,7 @@ static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize,
     }
 #endif
 
-    if (!(flags & HUF_flags_disableFast)) {
+    if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
         size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
         if (ret != 0)
             return ret;
@@ -1492,7 +1530,6 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
     for (;;) {
         BYTE* olimit;
         int stream;
-        int symbol;
 
         /* Assert loop preconditions */
 #ifndef NDEBUG
@@ -1549,54 +1586,56 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
         }
 #endif
 
+#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3)        \
+    if ((_decode3) || (_stream) != 3) {                 \
+        int const index = (int)(bits[(_stream)] >> 53); \
+        HUF_DEltX2 const entry = dtable[index];         \
+        MEM_write16(op[(_stream)], entry.sequence);     \
+        bits[(_stream)] <<= (entry.nbBits) & 0x3F;      \
+        op[(_stream)] += (entry.length);                \
+    }
+
+#define HUF_4X2_RELOAD_STREAM(_stream)                                  \
+    {                                                                   \
+        HUF_4X2_DECODE_SYMBOL(3, 1)                                     \
+        {                                                               \
+            int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
+            int const nbBits = ctz & 7;                                 \
+            int const nbBytes = ctz >> 3;                               \
+            ip[(_stream)] -= nbBytes;                                   \
+            bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1;            \
+            bits[(_stream)] <<= nbBits;                                 \
+        }                                                               \
+    }
+
+        /* Manually unroll the loop because compilers don't consistently
+         * unroll the inner loops, which destroys performance.
+         */
         do {
-            /* Do 5 table lookups for each of the first 3 streams */
-            for (symbol = 0; symbol < 5; ++symbol) {
-                for (stream = 0; stream < 3; ++stream) {
-                    int const index = (int)(bits[stream] >> 53);
-                    HUF_DEltX2 const entry = dtable[index];
-                    MEM_write16(op[stream], entry.sequence);
-                    bits[stream] <<= (entry.nbBits);
-                    op[stream] += (entry.length);
-                }
-            }
-            /* Do 1 table lookup from the final stream */
-            {
-                int const index = (int)(bits[3] >> 53);
-                HUF_DEltX2 const entry = dtable[index];
-                MEM_write16(op[3], entry.sequence);
-                bits[3] <<= (entry.nbBits);
-                op[3] += (entry.length);
-            }
-            /* Do 4 table lookups from the final stream & reload bitstreams */
-            for (stream = 0; stream < 4; ++stream) {
-                /* Do a table lookup from the final stream.
-                 * This is interleaved with the reloading to reduce register
-                 * pressure. This shouldn't be necessary, but compilers can
-                 * struggle with codegen with high register pressure.
-                 */
-                {
-                    int const index = (int)(bits[3] >> 53);
-                    HUF_DEltX2 const entry = dtable[index];
-                    MEM_write16(op[3], entry.sequence);
-                    bits[3] <<= (entry.nbBits);
-                    op[3] += (entry.length);
-                }
-                /* Reload the bistreams. The final bitstream must be reloaded
-                 * after the 5th symbol was decoded.
-                 */
-                {
-                    int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
-                    int const nbBits = ctz & 7;
-                    int const nbBytes = ctz >> 3;
-                    ip[stream] -= nbBytes;
-                    bits[stream] = MEM_read64(ip[stream]) | 1;
-                    bits[stream] <<= nbBits;
-                }
-            }
+            /* Decode 5 symbols from each of the first 3 streams.
+             * The final stream will be decoded during the reload phase
+             * to reduce register pressure.
+             */
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
+
+            /* Decode one symbol from the final stream */
+            HUF_4X2_DECODE_SYMBOL(3, 1)
+
+            /* Decode 4 symbols from the final stream & reload bitstreams.
+             * The final stream is reloaded last, meaning that all 5 symbols
+             * are decoded from the final stream before it is reloaded.
+             */
+            HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM)
         } while (op[3] < olimit);
     }
 
+#undef HUF_4X2_DECODE_SYMBOL
+#undef HUF_4X2_RELOAD_STREAM
+
 _out:
 
     /* Save the final values of each of the state variables back to args. */
@@ -1681,7 +1720,7 @@ static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize,
     }
 #endif
 
-    if (!(flags & HUF_flags_disableFast)) {
+    if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
         size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
         if (ret != 0)
             return ret;

From 5ab78c0418dd2b77e76e8350a563b9771a424b27 Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Mon, 20 Nov 2023 11:33:57 -0800
Subject: [PATCH 124/283] [huf] Improve fast C & ASM performance on small data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Rename `ilimit` to `ilowest` and set it equal to `src` instead of
  `src + 6 + 8`. This is safe because the fast decoding loops guarantee
  to never read below `ilowest` already. This allows the fast decoder to
  run for at least two more iterations, because it consumes at most 7
  bytes per iteration.
* Continue the fast loop all the way until the number of safe iterations
 is 0. Initially, I thought that when it got towards the end, the
 computation of how many iterations of safe might become expensive. But
 it ends up being slower to have to decode each of the 4 streams
 individually, which makes sense.

This drastically speeds up the Huffman decoder on the `github` dataset
for the issue raised in #3762, measured with `zstd -b1e1r github/`.

| Decoder  | Speed before | Speed after |
|----------|--------------|-------------|
| Fallback | 477 MB/s     | 477 MB/s    |
| Fast C   | 384 MB/s     | 492 MB/s    |
| Assembly | 385 MB/s     | 501 MB/s    |

We can also look at the speed delta for different block sizes of silesia
using `zstd -b1e1r silesia.tar -B#`.

| Decoder  | -B1K ∆ | -B2K ∆ | -B4K ∆ | -B8K ∆ | -B16K ∆ | -B32K ∆ | -B64K ∆ | -B128K ∆ |
|----------|--------|--------|--------|--------|---------|---------|---------|----------|
| Fast C   | +11.2% | +8.2%  | +6.1%  | +4.4%  | +2.7%   | +1.5%   | +0.6%   | +0.2%    |
| Assembly | +12.5% | +9.0%  | +6.2%  | +3.6%  | +1.5%   | +0.7%   | +0.2%   | +0.03%   |
---
 lib/decompress/huf_decompress.c       | 84 ++++++++++++++-------------
 lib/decompress/huf_decompress_amd64.S | 34 +++++------
 2 files changed, 61 insertions(+), 57 deletions(-)

diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c
index 5bbdef49ad5..b8795efb552 100644
--- a/lib/decompress/huf_decompress.c
+++ b/lib/decompress/huf_decompress.c
@@ -164,17 +164,18 @@ static size_t HUF_initFastDStream(BYTE const* ip) {
  * op [in/out] - The output pointers, must be updated to reflect what is written.
  * bits [in/out] - The bitstream containers, must be updated to reflect the current state.
  * dt [in] - The decoding table.
- * ilimit [in] - The input limit, stop when any input pointer is below ilimit.
+ * ilowest [in] - The beginning of the valid range of the input. Decoders may read
+ *                down to this pointer. It may be below iend[0].
  * oend [in] - The end of the output stream. op[3] must not cross oend.
  * iend [in] - The end of each input stream. ip[i] may cross iend[i],
- *             as long as it is above ilimit, but that indicates corruption.
+ *             as long as it is above ilowest, but that indicates corruption.
  */
 typedef struct {
     BYTE const* ip[4];
     BYTE* op[4];
     U64 bits[4];
     void const* dt;
-    BYTE const* ilimit;
+    BYTE const* ilowest;
     BYTE* oend;
     BYTE const* iend[4];
 } HUF_DecompressFastArgs;
@@ -192,7 +193,7 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
     void const* dt = DTable + 1;
     U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
 
-    const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
+    const BYTE* const istart = (const BYTE*)src;
 
     BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
 
@@ -215,7 +216,6 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
 
     /* Read the jump table. */
     {
-        const BYTE* const istart = (const BYTE*)src;
         size_t const length1 = MEM_readLE16(istart);
         size_t const length2 = MEM_readLE16(istart+2);
         size_t const length3 = MEM_readLE16(istart+4);
@@ -227,10 +227,8 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
 
         /* HUF_initFastDStream() requires this, and this small of an input
          * won't benefit from the ASM loop anyways.
-         * length1 must be >= 16 so that ip[0] >= ilimit before the loop
-         * starts.
          */
-        if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
+        if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8)
             return 0;
         if (length4 > srcSize) return ERROR(corruption_detected);   /* overflow */
     }
@@ -262,11 +260,12 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
     args->bits[2] = HUF_initFastDStream(args->ip[2]);
     args->bits[3] = HUF_initFastDStream(args->ip[3]);
 
-    /* If ip[] >= ilimit, it is guaranteed to be safe to
-        * reload bits[]. It may be beyond its section, but is
-        * guaranteed to be valid (>= istart).
-        */
-    args->ilimit = ilimit;
+    /* The decoders must be sure to never read beyond ilowest.
+     * This is lower than iend[0], but allowing decoders to read
+     * down to ilowest can allow an extra iteration or two in the
+     * fast loop.
+     */
+    args->ilowest = istart;
 
     args->oend = oend;
     args->dt = dt;
@@ -291,7 +290,7 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArg
     assert(sizeof(size_t) == 8);
     bit->bitContainer = MEM_readLEST(args->ip[stream]);
     bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
-    bit->start = (const char*)args->iend[0];
+    bit->start = (const char*)args->ilowest;
     bit->limitPtr = bit->start + sizeof(size_t);
     bit->ptr = (const char*)args->ip[stream];
 
@@ -717,7 +716,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
     BYTE* op[4];
     U16 const* const dtable = (U16 const*)args->dt;
     BYTE* const oend = args->oend;
-    BYTE const* const ilimit = args->ilimit;
+    BYTE const* const ilowest = args->ilowest;
 
     /* Copy the arguments to local variables */
     ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
@@ -735,7 +734,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
 #ifndef NDEBUG
         for (stream = 0; stream < 4; ++stream) {
             assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
-            assert(ip[stream] >= ilimit);
+            assert(ip[stream] >= ilowest);
         }
 #endif
         /* Compute olimit */
@@ -745,7 +744,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
             /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
              * per stream.
              */
-            size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
+            size_t const iiters = (size_t)(ip[0] - ilowest) / 7;
             /* We can safely run iters iterations before running bounds checks */
             size_t const iters = MIN(oiters, iiters);
             size_t const symbols = iters * 5;
@@ -756,8 +755,8 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
              */
             olimit = op[3] + symbols;
 
-            /* Exit fast decoding loop once we get close to the end. */
-            if (op[3] + 20 > olimit)
+            /* Exit fast decoding loop once we reach the end. */
+            if (op[3] == olimit)
                 break;
 
             /* Exit the decoding loop if any input pointer has crossed the
@@ -836,7 +835,7 @@ HUF_decompress4X1_usingDTable_internal_fast(
     HUF_DecompressFastLoopFn loopFn)
 {
     void const* dt = DTable + 1;
-    const BYTE* const iend = (const BYTE*)cSrc + 6;
+    BYTE const* const ilowest = (BYTE const*)cSrc;
     BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
     HUF_DecompressFastArgs args;
     {   size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
@@ -845,18 +844,22 @@ HUF_decompress4X1_usingDTable_internal_fast(
             return 0;
     }
 
-    assert(args.ip[0] >= args.ilimit);
+    assert(args.ip[0] >= args.ilowest);
     loopFn(&args);
 
-    /* Our loop guarantees that ip[] >= ilimit and that we haven't
+    /* Our loop guarantees that ip[] >= ilowest and that we haven't
     * overwritten any op[].
     */
-    assert(args.ip[0] >= iend);
-    assert(args.ip[1] >= iend);
-    assert(args.ip[2] >= iend);
-    assert(args.ip[3] >= iend);
+    assert(args.ip[0] >= ilowest);
+    assert(args.ip[0] >= ilowest);
+    assert(args.ip[1] >= ilowest);
+    assert(args.ip[2] >= ilowest);
+    assert(args.ip[3] >= ilowest);
     assert(args.op[3] <= oend);
-    (void)iend;
+
+    assert(ilowest == args.ilowest);
+    assert(ilowest + 6 == args.iend[0]);
+    (void)ilowest;
 
     /* finish bit streams one by one. */
     {   size_t const segmentSize = (dstSize+3) / 4;
@@ -1512,7 +1515,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
     BYTE* op[4];
     BYTE* oend[4];
     HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
-    BYTE const* const ilimit = args->ilimit;
+    BYTE const* const ilowest = args->ilowest;
 
     /* Copy the arguments to local registers. */
     ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
@@ -1535,7 +1538,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
 #ifndef NDEBUG
         for (stream = 0; stream < 4; ++stream) {
             assert(op[stream] <= oend[stream]);
-            assert(ip[stream] >= ilimit);
+            assert(ip[stream] >= ilowest);
         }
 #endif
         /* Compute olimit */
@@ -1548,7 +1551,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
              * We also know that each input pointer is >= ip[0]. So we can run
              * iters loops before running out of input.
              */
-            size_t iters = (size_t)(ip[0] - ilimit) / 7;
+            size_t iters = (size_t)(ip[0] - ilowest) / 7;
             /* Each iteration can produce up to 10 bytes of output per stream.
              * Each output stream my advance at different rates. So take the
              * minimum number of safe iterations among all the output streams.
@@ -1566,8 +1569,8 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
              */
             olimit = op[3] + (iters * 5);
 
-            /* Exit the fast decoding loop if we are too close to the end. */
-            if (op[3] + 10 > olimit)
+            /* Exit the fast decoding loop once we reach the end. */
+            if (op[3] == olimit)
                 break;
 
             /* Exit the decoding loop if any input pointer has crossed the
@@ -1652,7 +1655,7 @@ HUF_decompress4X2_usingDTable_internal_fast(
     const HUF_DTable* DTable,
     HUF_DecompressFastLoopFn loopFn) {
     void const* dt = DTable + 1;
-    const BYTE* const iend = (const BYTE*)cSrc + 6;
+    const BYTE* const ilowest = (const BYTE*)cSrc;
     BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
     HUF_DecompressFastArgs args;
     {
@@ -1662,16 +1665,19 @@ HUF_decompress4X2_usingDTable_internal_fast(
             return 0;
     }
 
-    assert(args.ip[0] >= args.ilimit);
+    assert(args.ip[0] >= args.ilowest);
     loopFn(&args);
 
     /* note : op4 already verified within main loop */
-    assert(args.ip[0] >= iend);
-    assert(args.ip[1] >= iend);
-    assert(args.ip[2] >= iend);
-    assert(args.ip[3] >= iend);
+    assert(args.ip[0] >= ilowest);
+    assert(args.ip[1] >= ilowest);
+    assert(args.ip[2] >= ilowest);
+    assert(args.ip[3] >= ilowest);
     assert(args.op[3] <= oend);
-    (void)iend;
+
+    assert(ilowest == args.ilowest);
+    assert(ilowest + 6 == args.iend[0]);
+    (void)ilowest;
 
     /* finish bitStreams one by one */
     {
diff --git a/lib/decompress/huf_decompress_amd64.S b/lib/decompress/huf_decompress_amd64.S
index 671624fe343..3b96b44612f 100644
--- a/lib/decompress/huf_decompress_amd64.S
+++ b/lib/decompress/huf_decompress_amd64.S
@@ -131,7 +131,7 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
     movq 88(%rax), %bits3
     movq 96(%rax), %dtable
     push %rax      /* argument */
-    push 104(%rax) /* ilimit */
+    push 104(%rax) /* ilowest */
     push 112(%rax) /* oend */
     push %olimit   /* olimit space */
 
@@ -156,11 +156,11 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
     shrq $2, %r15
 
     movq %ip0,     %rax /* rax = ip0 */
-    movq 40(%rsp), %rdx /* rdx = ilimit */
-    subq %rdx,     %rax /* rax = ip0 - ilimit */
-    movq %rax,     %rbx /* rbx = ip0 - ilimit */
+    movq 40(%rsp), %rdx /* rdx = ilowest */
+    subq %rdx,     %rax /* rax = ip0 - ilowest */
+    movq %rax,     %rbx /* rbx = ip0 - ilowest */
 
-    /* rdx = (ip0 - ilimit) / 7 */
+    /* rdx = (ip0 - ilowest) / 7 */
     movabsq $2635249153387078803, %rdx
     mulq %rdx
     subq %rdx, %rbx
@@ -183,9 +183,8 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
 
     /* If (op3 + 20 > olimit) */
     movq %op3, %rax    /* rax = op3 */
-    addq $20,  %rax    /* rax = op3 + 20 */
-    cmpq %rax, %olimit /* op3 + 20 > olimit */
-    jb .L_4X1_exit
+    cmpq %rax, %olimit /* op3 == olimit */
+    je .L_4X1_exit
 
     /* If (ip1 < ip0) go to exit */
     cmpq %ip0, %ip1
@@ -316,7 +315,7 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
     /* Restore stack (oend & olimit) */
     pop %rax /* olimit */
     pop %rax /* oend */
-    pop %rax /* ilimit */
+    pop %rax /* ilowest */
     pop %rax /* arg */
 
     /* Save ip / op / bits */
@@ -387,7 +386,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
     movq 96(%rax), %dtable
     push %rax      /* argument */
     push %rax      /* olimit */
-    push 104(%rax) /* ilimit */
+    push 104(%rax) /* ilowest */
 
     movq 112(%rax), %rax
     push %rax /* oend3 */
@@ -414,9 +413,9 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
 
     /* We can consume up to 7 input bytes each iteration. */
     movq %ip0,     %rax  /* rax = ip0 */
-    movq 40(%rsp), %rdx  /* rdx = ilimit */
-    subq %rdx,     %rax  /* rax = ip0 - ilimit */
-    movq %rax,    %r15   /* r15 = ip0 - ilimit */
+    movq 40(%rsp), %rdx  /* rdx = ilowest */
+    subq %rdx,     %rax  /* rax = ip0 - ilowest */
+    movq %rax,    %r15   /* r15 = ip0 - ilowest */
 
     /* rdx = rax / 7 */
     movabsq $2635249153387078803, %rdx
@@ -426,7 +425,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
     addq %r15, %rdx
     shrq $2, %rdx
 
-    /* r15 = (ip0 - ilimit) / 7 */
+    /* r15 = (ip0 - ilowest) / 7 */
     movq %rdx, %r15
 
     /* r15 = min(r15, min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3) / 10) */
@@ -467,9 +466,8 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
 
     /* If (op3 + 10 > olimit) */
     movq %op3, %rax    /* rax = op3 */
-    addq $10,  %rax    /* rax = op3 + 10 */
-    cmpq %rax, %olimit /* op3 + 10 > olimit */
-    jb .L_4X2_exit
+    cmpq %rax, %olimit /* op3 == olimit */
+    je .L_4X2_exit
 
     /* If (ip1 < ip0) go to exit */
     cmpq %ip0, %ip1
@@ -537,7 +535,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
     pop %rax /* oend1 */
     pop %rax /* oend2 */
     pop %rax /* oend3 */
-    pop %rax /* ilimit */
+    pop %rax /* ilowest */
     pop %rax /* olimit */
     pop %rax /* arg */
 

From dd4de1dd7a78ccff933025cf1de08a75d310802b Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Mon, 20 Nov 2023 12:04:30 -0800
Subject: [PATCH 125/283] [huf] Fix null pointer addition

`HUF_DecompressFastArgs_init()` was adding 0 to NULL. Fix it by exiting
early for empty outputs. This is no change in behavior, because the
function was already exiting 0 in this case, just slightly later.
---
 lib/decompress/huf_decompress.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c
index b8795efb552..0c43c656de8 100644
--- a/lib/decompress/huf_decompress.c
+++ b/lib/decompress/huf_decompress.c
@@ -203,6 +203,11 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
     if (!MEM_isLittleEndian() || MEM_32bits())
         return 0;
 
+    /* Avoid nullptr addition */
+    if (dstSize == 0)
+        return 0;
+    assert(dst != NULL);
+
     /* strict minimum : jump table + 1 byte per stream */
     if (srcSize < 10)
         return ERROR(corruption_detected);

From 8193250615f56ace446a3bf963d195f9f33fa9a9 Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Tue, 21 Nov 2023 13:26:25 -0800
Subject: [PATCH 126/283] Modernize macros to use `do { } while (0)`

This PR introduces no functional changes. It attempts to change all
macros currently using `{ }` or some variant of that to to
`do { } while (0)`, and introduces trailing `;` where necessary.
There were no bugs found during this migration.

The bug in Visual Studios warning on this has been fixed since VS2015.
Additionally, we have several instances of `do { } while (0)` which have
been present for several releases, so we don't have to worry about
breaking peoples builds.

Fixes Issue #3830.
---
 lib/common/compiler.h            |  33 ++++-----
 lib/common/debug.h               |  27 ++++---
 lib/common/error_private.h       |  81 +++++++++++----------
 lib/common/zstd_internal.h       |   6 +-
 lib/compress/zstd_compress.c     |  20 +++---
 lib/compress/zstd_double_fast.c  |   4 +-
 lib/compress/zstd_fast.c         |   2 +-
 lib/compress/zstd_opt.c          |   4 +-
 lib/compress/zstdmt_compress.c   |  61 ++++++++--------
 lib/decompress/huf_decompress.c  | 118 +++++++++++++++++--------------
 lib/decompress/zstd_decompress.c |   2 +-
 lib/dictBuilder/zdict.c          |  18 +++--
 12 files changed, 207 insertions(+), 169 deletions(-)

diff --git a/lib/common/compiler.h b/lib/common/compiler.h
index df39d91c6e0..31880ecbe16 100644
--- a/lib/common/compiler.h
+++ b/lib/common/compiler.h
@@ -132,8 +132,8 @@
 /* prefetch
  * can be disabled, by declaring NO_PREFETCH build macro */
 #if defined(NO_PREFETCH)
-#  define PREFETCH_L1(ptr)  (void)(ptr)  /* disabled */
-#  define PREFETCH_L2(ptr)  (void)(ptr)  /* disabled */
+#  define PREFETCH_L1(ptr)  do { (void)(ptr); } while (0)  /* disabled */
+#  define PREFETCH_L2(ptr)  do { (void)(ptr); } while (0)  /* disabled */
 #else
 #  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC)  /* _mm_prefetch() is not defined outside of x86/x64 */
 #    include    /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
@@ -143,24 +143,25 @@
 #    define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
 #    define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
 #  elif defined(__aarch64__)
-#    define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
-#    define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
+#    define PREFETCH_L1(ptr)  do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
+#    define PREFETCH_L2(ptr)  do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
 #  else
-#    define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
-#    define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
+#    define PREFETCH_L1(ptr) do { (void)(ptr); } while (0)  /* disabled */
+#    define PREFETCH_L2(ptr) do { (void)(ptr); } while (0)  /* disabled */
 #  endif
 #endif  /* NO_PREFETCH */
 
 #define CACHELINE_SIZE 64
 
-#define PREFETCH_AREA(p, s)  {            \
-    const char* const _ptr = (const char*)(p);  \
-    size_t const _size = (size_t)(s);     \
-    size_t _pos;                          \
-    for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
-        PREFETCH_L2(_ptr + _pos);         \
-    }                                     \
-}
+#define PREFETCH_AREA(p, s)                              \
+    do {                                                 \
+        const char* const _ptr = (const char*)(p);       \
+        size_t const _size = (size_t)(s);                \
+        size_t _pos;                                     \
+        for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
+            PREFETCH_L2(_ptr + _pos);                    \
+        }                                                \
+    } while (0)
 
 /* vectorization
  * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
@@ -189,9 +190,9 @@
 #endif
 
 #if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
-#  define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
+#  define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0)
 #else
-#  define ZSTD_UNREACHABLE { assert(0); }
+#  define ZSTD_UNREACHABLE do { assert(0); } while (0)
 #endif
 
 /* disable warnings */
diff --git a/lib/common/debug.h b/lib/common/debug.h
index 0e9817ea6d6..8049e4b5d89 100644
--- a/lib/common/debug.h
+++ b/lib/common/debug.h
@@ -85,18 +85,23 @@ extern int g_debuglevel; /* the variable is only declared,
                             It's useful when enabling very verbose levels
                             on selective conditions (such as position in src) */
 
-#  define RAWLOG(l, ...) {                                       \
-                if (l<=g_debuglevel) {                           \
-                    ZSTD_DEBUG_PRINT(__VA_ARGS__);               \
-            }   }
-#  define DEBUGLOG(l, ...) {                                     \
-                if (l<=g_debuglevel) {                           \
-                    ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
-                    ZSTD_DEBUG_PRINT(" \n");                     \
-            }   }
+#  define RAWLOG(l, ...)                   \
+    do {                                   \
+        if (l<=g_debuglevel) {             \
+            ZSTD_DEBUG_PRINT(__VA_ARGS__); \
+        }                                  \
+    } while (0)
+
+#  define DEBUGLOG(l, ...)                               \
+    do {                                                 \
+        if (l<=g_debuglevel) {                           \
+            ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
+            ZSTD_DEBUG_PRINT(" \n");                     \
+        }                                                \
+    } while (0)
 #else
-#  define RAWLOG(l, ...)      {}    /* disabled */
-#  define DEBUGLOG(l, ...)    {}    /* disabled */
+#  define RAWLOG(l, ...)   do { } while (0)    /* disabled */
+#  define DEBUGLOG(l, ...) do { } while (0)    /* disabled */
 #endif
 
 
diff --git a/lib/common/error_private.h b/lib/common/error_private.h
index 325daad404b..0156010c745 100644
--- a/lib/common/error_private.h
+++ b/lib/common/error_private.h
@@ -60,8 +60,13 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
 ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
 
 /* check and forward error code */
-#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
-#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
+#define CHECK_V_F(e, f)     \
+    size_t const e = f;     \
+    do {                    \
+        if (ERR_isError(e)) \
+            return e;       \
+    } while (0)
+#define CHECK_F(f)   do { CHECK_V_F(_var_err__, f); } while (0)
 
 
 /*-****************************************
@@ -95,10 +100,12 @@ void _force_has_format_string(const char *format, ...) {
  * We want to force this function invocation to be syntactically correct, but
  * we don't want to force runtime evaluation of its arguments.
  */
-#define _FORCE_HAS_FORMAT_STRING(...) \
-  if (0) { \
-    _force_has_format_string(__VA_ARGS__); \
-  }
+#define _FORCE_HAS_FORMAT_STRING(...)              \
+    do {                                           \
+        if (0) {                                   \
+            _force_has_format_string(__VA_ARGS__); \
+        }                                          \
+    } while (0)
 
 #define ERR_QUOTE(str) #str
 
@@ -109,48 +116,50 @@ void _force_has_format_string(const char *format, ...) {
  * In order to do that (particularly, printing the conditional that failed),
  * this can't just wrap RETURN_ERROR().
  */
-#define RETURN_ERROR_IF(cond, err, ...) \
-  if (cond) { \
-    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
-           __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
-    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
-    RAWLOG(3, ": " __VA_ARGS__); \
-    RAWLOG(3, "\n"); \
-    return ERROR(err); \
-  }
+#define RETURN_ERROR_IF(cond, err, ...)                                        \
+    do {                                                                       \
+        if (cond) {                                                            \
+            RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s",          \
+                  __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
+            _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                             \
+            RAWLOG(3, ": " __VA_ARGS__);                                       \
+            RAWLOG(3, "\n");                                                   \
+            return ERROR(err);                                                 \
+        }                                                                      \
+    } while (0)
 
 /**
  * Unconditionally return the specified error.
  *
  * In debug modes, prints additional information.
  */
-#define RETURN_ERROR(err, ...) \
-  do { \
-    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
-           __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
-    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
-    RAWLOG(3, ": " __VA_ARGS__); \
-    RAWLOG(3, "\n"); \
-    return ERROR(err); \
-  } while(0);
+#define RETURN_ERROR(err, ...)                                               \
+    do {                                                                     \
+        RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+              __FILE__, __LINE__, ERR_QUOTE(ERROR(err)));                    \
+        _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                               \
+        RAWLOG(3, ": " __VA_ARGS__);                                         \
+        RAWLOG(3, "\n");                                                     \
+        return ERROR(err);                                                   \
+    } while(0)
 
 /**
  * If the provided expression evaluates to an error code, returns that error code.
  *
  * In debug modes, prints additional information.
  */
-#define FORWARD_IF_ERROR(err, ...) \
-  do { \
-    size_t const err_code = (err); \
-    if (ERR_isError(err_code)) { \
-      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
-             __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
-      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
-      RAWLOG(3, ": " __VA_ARGS__); \
-      RAWLOG(3, "\n"); \
-      return err_code; \
-    } \
-  } while(0);
+#define FORWARD_IF_ERROR(err, ...)                                                 \
+    do {                                                                           \
+        size_t const err_code = (err);                                             \
+        if (ERR_isError(err_code)) {                                               \
+            RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s",                 \
+                  __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
+            _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                                 \
+            RAWLOG(3, ": " __VA_ARGS__);                                           \
+            RAWLOG(3, "\n");                                                       \
+            return err_code;                                                       \
+        }                                                                          \
+    } while(0)
 
 #if defined (__cplusplus)
 }
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index f7c57a028bf..ecb9cfba87c 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -178,7 +178,7 @@ static void ZSTD_copy8(void* dst, const void* src) {
     ZSTD_memcpy(dst, src, 8);
 #endif
 }
-#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+#define COPY8(d,s) do { ZSTD_copy8(d,s); d+=8; s+=8; } while (0)
 
 /* Need to use memmove here since the literal buffer can now be located within
    the dst buffer. In circumstances where the op "catches up" to where the
@@ -198,7 +198,7 @@ static void ZSTD_copy16(void* dst, const void* src) {
     ZSTD_memcpy(dst, copy16_buf, 16);
 #endif
 }
-#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
+#define COPY16(d,s) do { ZSTD_copy16(d,s); d+=16; s+=16; } while (0)
 
 #define WILDCOPY_OVERLENGTH 32
 #define WILDCOPY_VECLEN 16
@@ -227,7 +227,7 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
     if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
         /* Handle short offset copies. */
         do {
-            COPY8(op, ip)
+            COPY8(op, ip);
         } while (op < oend);
     } else {
         assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index b79266fdb7c..4e441baf046 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -650,10 +650,11 @@ static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
     return 0;
 }
 
-#define BOUNDCHECK(cParam, val) { \
-    RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
-                    parameter_outOfBound, "Param out of bounds"); \
-}
+#define BOUNDCHECK(cParam, val)                                       \
+    do {                                                              \
+        RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val),        \
+                        parameter_outOfBound, "Param out of bounds"); \
+    } while (0)
 
 
 static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
@@ -1392,11 +1393,12 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
 static ZSTD_compressionParameters
 ZSTD_clampCParams(ZSTD_compressionParameters cParams)
 {
-#   define CLAMP_TYPE(cParam, val, type) {                                \
-        ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
-        if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \
-    }
+#   define CLAMP_TYPE(cParam, val, type)                                      \
+        do {                                                                  \
+            ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
+            if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \
+        } while (0)
 #   define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
     CLAMP(ZSTD_c_windowLog, cParams.windowLog);
     CLAMP(ZSTD_c_chainLog,  cParams.chainLog);
diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c
index aaa6f3d3d20..a4e9c50d3bf 100644
--- a/lib/compress/zstd_double_fast.c
+++ b/lib/compress/zstd_double_fast.c
@@ -356,8 +356,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
     if (ms->prefetchCDictTables) {
         size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
         size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
-        PREFETCH_AREA(dictHashLong, hashTableBytes)
-        PREFETCH_AREA(dictHashSmall, chainTableBytes)
+        PREFETCH_AREA(dictHashLong, hashTableBytes);
+        PREFETCH_AREA(dictHashSmall, chainTableBytes);
     }
 
     /* init */
diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c
index fb1ef60c1f8..6c4554cfca7 100644
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@@ -508,7 +508,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
 
     if (ms->prefetchCDictTables) {
         size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
-        PREFETCH_AREA(dictHashTable, hashTableBytes)
+        PREFETCH_AREA(dictHashTable, hashTableBytes);
     }
 
     /* init */
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 11d460c60c7..68537e60097 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1182,7 +1182,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
         for (cur = 1; cur <= last_pos; cur++) {
             const BYTE* const inr = ip + cur;
             assert(cur < ZSTD_OPT_NUM);
-            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
+            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
 
             /* Fix current position with one literal if cheaper */
             {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
@@ -1331,7 +1331,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             }
 
             /* save sequences */
-            DEBUGLOG(6, "sending selected sequences into seqStore")
+            DEBUGLOG(6, "sending selected sequences into seqStore");
             {   U32 storePos;
                 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
                     U32 const llen = opt[storePos].litlen;
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 6b3391a0f8d..baa0f006977 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -40,12 +40,13 @@
 #  include 
 #  include 
 
-#  define DEBUG_PRINTHEX(l,p,n) {            \
-    unsigned debug_u;                        \
-    for (debug_u=0; debug_u<(n); debug_u++)  \
-        RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
-    RAWLOG(l, " \n");                        \
-}
+#  define DEBUG_PRINTHEX(l,p,n)                                       \
+    do {                                                              \
+        unsigned debug_u;                                             \
+        for (debug_u=0; debug_u<(n); debug_u++)                       \
+            RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
+        RAWLOG(l, " \n");                                             \
+    } while (0)
 
 static unsigned long long GetCurrentClockTimeMicroseconds(void)
 {
@@ -57,25 +58,28 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
 }  }
 
 #define MUTEX_WAIT_TIME_DLEVEL 6
-#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) {          \
-    if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) {   \
-        unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
-        ZSTD_pthread_mutex_lock(mutex);           \
-        {   unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
-            unsigned long long const elapsedTime = (afterTime-beforeTime); \
-            if (elapsedTime > 1000) {  /* or whatever threshold you like; I'm using 1 millisecond here */ \
-                DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
-                   elapsedTime, #mutex);          \
-        }   }                                     \
-    } else {                                      \
-        ZSTD_pthread_mutex_lock(mutex);           \
-    }                                             \
-}
+#define ZSTD_PTHREAD_MUTEX_LOCK(mutex)                                                  \
+    do {                                                                                \
+        if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) {                                     \
+            unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds();    \
+            ZSTD_pthread_mutex_lock(mutex);                                             \
+            {   unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
+                unsigned long long const elapsedTime = (afterTime-beforeTime);          \
+                if (elapsedTime > 1000) {                                               \
+                    /* or whatever threshold you like; I'm using 1 millisecond here */  \
+                    DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL,                                    \
+                        "Thread took %llu microseconds to acquire mutex %s \n",         \
+                        elapsedTime, #mutex);                                           \
+            }   }                                                                       \
+        } else {                                                                        \
+            ZSTD_pthread_mutex_lock(mutex);                                             \
+        }                                                                               \
+    } while (0)
 
 #else
 
 #  define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
-#  define DEBUG_PRINTHEX(l,p,n) {}
+#  define DEBUG_PRINTHEX(l,p,n) do { } while (0)
 
 #endif
 
@@ -667,12 +671,13 @@ typedef struct {
     unsigned frameChecksumNeeded;        /* used only by mtctx */
 } ZSTDMT_jobDescription;
 
-#define JOB_ERROR(e) {                          \
-    ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);   \
-    job->cSize = e;                             \
-    ZSTD_pthread_mutex_unlock(&job->job_mutex); \
-    goto _endJob;                               \
-}
+#define JOB_ERROR(e)                                \
+    do {                                            \
+        ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);   \
+        job->cSize = e;                             \
+        ZSTD_pthread_mutex_unlock(&job->job_mutex); \
+        goto _endJob;                               \
+    } while (0)
 
 /* ZSTDMT_compressionJob() is a POOL_function type */
 static void ZSTDMT_compressionJob(void* jobDescription)
@@ -1101,7 +1106,7 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
     {   unsigned jobNb;
         unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
         DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
-                    mtctx->doneJobID, lastJobNb, mtctx->jobReady)
+                    mtctx->doneJobID, lastJobNb, mtctx->jobReady);
         for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
             unsigned const wJobID = jobNb & mtctx->jobIDMask;
             ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];
diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c
index 0c43c656de8..f85dd0beea0 100644
--- a/lib/decompress/huf_decompress.c
+++ b/lib/decompress/huf_decompress.c
@@ -304,21 +304,21 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArg
 
 /* Calls X(N) for each stream 0, 1, 2, 3. */
 #define HUF_4X_FOR_EACH_STREAM(X) \
-    {                             \
-        X(0)                      \
-        X(1)                      \
-        X(2)                      \
-        X(3)                      \
-    }
+    do {                          \
+        X(0);                     \
+        X(1);                     \
+        X(2);                     \
+        X(3);                     \
+    } while (0)
 
 /* Calls X(N, var) for each stream 0, 1, 2, 3. */
 #define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \
-    {                                           \
-        X(0, (var))                             \
-        X(1, (var))                             \
-        X(2, (var))                             \
-        X(3, (var))                             \
-    }
+    do {                                        \
+        X(0, (var));                            \
+        X(1, (var));                            \
+        X(2, (var));                            \
+        X(3, (var));                            \
+    } while (0)
 
 
 #ifndef HUF_FORCE_DECOMPRESS_X2
@@ -528,15 +528,19 @@ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog
 }
 
 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
-    *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
+    do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0)
 
-#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
-    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
-        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)      \
+    do {                                            \
+        if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+            HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
+    } while (0)
 
-#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
-    if (MEM_64bits()) \
-        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr)      \
+    do {                                            \
+        if (MEM_64bits())                           \
+            HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
+    } while (0)
 
 HINT_INLINE size_t
 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
@@ -781,15 +785,15 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
 #endif
 
 #define HUF_4X1_DECODE_SYMBOL(_stream, _symbol)                 \
-    {                                                           \
+    do {                                                        \
         int const index = (int)(bits[(_stream)] >> 53);         \
         int const entry = (int)dtable[index];                   \
         bits[(_stream)] <<= (entry & 0x3F);                     \
         op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \
-    }
+    } while (0)
 
 #define HUF_4X1_RELOAD_STREAM(_stream)                              \
-    {                                                               \
+    do {                                                            \
         int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
         int const nbBits = ctz & 7;                                 \
         int const nbBytes = ctz >> 3;                               \
@@ -797,21 +801,21 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
         ip[(_stream)] -= nbBytes;                                   \
         bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1;            \
         bits[(_stream)] <<= nbBits;                                 \
-    }
+    } while (0)
 
         /* Manually unroll the loop because compilers don't consistently
          * unroll the inner loops, which destroys performance.
          */
         do {
             /* Decode 5 symbols in each of the 4 streams */
-            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0)
-            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1)
-            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2)
-            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3)
-            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4)
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4);
 
             /* Reload each of the 4 the bitstreams */
-            HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM)
+            HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM);
         } while (op[3] < olimit);
 
 #undef HUF_4X1_DECODE_SYMBOL
@@ -1286,15 +1290,19 @@ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, c
 }
 
 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
-    ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+    do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0)
 
-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
-    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
-        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)                     \
+    do {                                                           \
+        if (MEM_64bits() || (HUF_TABLELOG_MAX<=12))                \
+            ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
+    } while (0)
 
-#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
-    if (MEM_64bits()) \
-        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr)                     \
+    do {                                                           \
+        if (MEM_64bits())                                          \
+            ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
+    } while (0)
 
 HINT_INLINE size_t
 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
@@ -1594,18 +1602,20 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
         }
 #endif
 
-#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3)        \
-    if ((_decode3) || (_stream) != 3) {                 \
-        int const index = (int)(bits[(_stream)] >> 53); \
-        HUF_DEltX2 const entry = dtable[index];         \
-        MEM_write16(op[(_stream)], entry.sequence);     \
-        bits[(_stream)] <<= (entry.nbBits) & 0x3F;      \
-        op[(_stream)] += (entry.length);                \
-    }
+#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3)                      \
+    do {                                                              \
+        if ((_decode3) || (_stream) != 3) {                           \
+            int const index = (int)(bits[(_stream)] >> 53);           \
+            HUF_DEltX2 const entry = dtable[index];                   \
+            MEM_write16(op[(_stream)], entry.sequence); \
+            bits[(_stream)] <<= (entry.nbBits) & 0x3F;                \
+            op[(_stream)] += (entry.length);                          \
+        }                                                             \
+    } while (0)
 
 #define HUF_4X2_RELOAD_STREAM(_stream)                                  \
-    {                                                                   \
-        HUF_4X2_DECODE_SYMBOL(3, 1)                                     \
+    do {                                                                \
+        HUF_4X2_DECODE_SYMBOL(3, 1);                                    \
         {                                                               \
             int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
             int const nbBits = ctz & 7;                                 \
@@ -1614,7 +1624,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
             bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1;            \
             bits[(_stream)] <<= nbBits;                                 \
         }                                                               \
-    }
+    } while (0)
 
         /* Manually unroll the loop because compilers don't consistently
          * unroll the inner loops, which destroys performance.
@@ -1624,20 +1634,20 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
              * The final stream will be decoded during the reload phase
              * to reduce register pressure.
              */
-            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
-            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
-            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
-            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
-            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
 
             /* Decode one symbol from the final stream */
-            HUF_4X2_DECODE_SYMBOL(3, 1)
+            HUF_4X2_DECODE_SYMBOL(3, 1);
 
             /* Decode 4 symbols from the final stream & reload bitstreams.
              * The final stream is reloaded last, meaning that all 5 symbols
              * are decoded from the final stream before it is reloaded.
              */
-            HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM)
+            HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM);
         } while (op[3] < olimit);
     }
 
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 027a0f8cc74..f57b31e70f6 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -2173,7 +2173,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                     /* shortcut : using single-pass mode */
                     size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
                     if (ZSTD_isError(decompressedSize)) return decompressedSize;
-                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
+                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()");
                     assert(istart != NULL);
                     ip = istart + cSize;
                     op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c
index 58290f450d6..82e999e80e3 100644
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -74,9 +74,9 @@ static const U32 g_selectivity_default = 9;
 *  Console display
 ***************************************/
 #undef  DISPLAY
-#define DISPLAY(...)         { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
+#define DISPLAY(...)         do { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } while (0)
 #undef  DISPLAYLEVEL
-#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); }    /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
+#define DISPLAYLEVEL(l, ...) do { if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } } while (0)    /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
 
 static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
 
@@ -477,10 +477,16 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
     clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
 
 #   undef  DISPLAYUPDATE
-#   define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
-            if (ZDICT_clockSpan(displayClock) > refreshRate)  \
-            { displayClock = clock(); DISPLAY(__VA_ARGS__); \
-            if (notificationLevel>=4) fflush(stderr); } }
+#   define DISPLAYUPDATE(l, ...)                                   \
+        do {                                                       \
+            if (notificationLevel>=l) {                            \
+                if (ZDICT_clockSpan(displayClock) > refreshRate) { \
+                    displayClock = clock();                        \
+                    DISPLAY(__VA_ARGS__);                          \
+                }                                                  \
+                if (notificationLevel>=4) fflush(stderr);          \
+            }                                                      \
+        } while (0)
 
     /* init */
     DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */

From dc84e35138338e95016fe23feb7dae43a842ca4f Mon Sep 17 00:00:00 2001
From: elasota <1137273+elasota@users.noreply.github.com>
Date: Wed, 8 Nov 2023 23:25:00 -0500
Subject: [PATCH 127/283] Clarify that the presence of a value with weight 1 is
 required

---
 doc/zstd_compression_format.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index b64ddc3bff2..dbf409eac88 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -1252,7 +1252,7 @@ Number_of_Bits = Weight ? (Max_Number_of_Bits + 1 - Weight) : 0
 ```
 When a literal value is not present, it receives a `Weight` of 0.
 The least frequent symbol receives a `Weight` of 1.
-Consequently, the `Weight` 1 is necessarily present.
+If no symbol has a `Weight` of 1, then the data is considered corrupted.
 The most frequent symbol receives a `Weight` anywhere between 1 and 11 (max).
 The last symbol's `Weight` is deduced from previously retrieved Weights,
 by completing to the nearest power of 2. It's necessarily non 0.

From 05059e5a48333e594e0204894cbbdffe51305487 Mon Sep 17 00:00:00 2001
From: elasota <1137273+elasota@users.noreply.github.com>
Date: Wed, 8 Nov 2023 23:46:37 -0500
Subject: [PATCH 128/283] Clarify that there must be at least 2 weights, i.e.
 encoding all weights as 0 is invalid

---
 doc/zstd_compression_format.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index dbf409eac88..4a8d338b7a6 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -1252,7 +1252,9 @@ Number_of_Bits = Weight ? (Max_Number_of_Bits + 1 - Weight) : 0
 ```
 When a literal value is not present, it receives a `Weight` of 0.
 The least frequent symbol receives a `Weight` of 1.
-If no symbol has a `Weight` of 1, then the data is considered corrupted.
+If no literal has a `Weight` of 1, then the data is considered corrupted.
+If there are not at least two literals with non-zero `Weight`, then the data
+is considered corrupted.
 The most frequent symbol receives a `Weight` anywhere between 1 and 11 (max).
 The last symbol's `Weight` is deduced from previously retrieved Weights,
 by completing to the nearest power of 2. It's necessarily non 0.

From 809c7eb6bff1934745b425437d2116d9c0dbe0df Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky 
Date: Mon, 27 Nov 2023 06:52:36 -0800
Subject: [PATCH 129/283] Refactor ZSTD_sequenceProducer_F typedef to
 ZSTD_sequenceProducer_F*

---
 lib/compress/zstd_compress.c          | 2 +-
 lib/compress/zstd_compress_internal.h | 2 +-
 lib/zstd.h                            | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 4e441baf046..1a414a5dc83 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -7086,7 +7086,7 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH
 
 void ZSTD_registerSequenceProducer(
     ZSTD_CCtx* zc, void* mState,
-    ZSTD_sequenceProducer_F* mFinder
+    ZSTD_sequenceProducer_F mFinder
 ) {
     if (mFinder != NULL) {
         ZSTD_externalMatchCtx emctx;
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index b5e1a08ca32..819bd3956da 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -404,7 +404,7 @@ typedef struct {
 /* Context for block-level external matchfinder API */
 typedef struct {
   void* mState;
-  ZSTD_sequenceProducer_F* mFinder;
+  ZSTD_sequenceProducer_F mFinder;
   ZSTD_Sequence* seqBuffer;
   size_t seqBufferCapacity;
 } ZSTD_externalMatchCtx;
diff --git a/lib/zstd.h b/lib/zstd.h
index c33dab3cd9f..61f81db0f25 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -2789,7 +2789,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
 
 #define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1))
 
-typedef size_t ZSTD_sequenceProducer_F (
+typedef size_t (*ZSTD_sequenceProducer_F) (
   void* sequenceProducerState,
   ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
   const void* src, size_t srcSize,
@@ -2821,7 +2821,7 @@ ZSTDLIB_STATIC_API void
 ZSTD_registerSequenceProducer(
   ZSTD_CCtx* cctx,
   void* sequenceProducerState,
-  ZSTD_sequenceProducer_F* sequenceProducer
+  ZSTD_sequenceProducer_F sequenceProducer
 );
 
 

From d151a4880bdcb15d10ed11136b8b7d8d3d66af2c Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky 
Date: Mon, 27 Nov 2023 06:52:52 -0800
Subject: [PATCH 130/283] Move offload API params into ZSTD_CCtx_params

---
 lib/compress/zstd_compress.c          | 66 ++++++++++++---------------
 lib/compress/zstd_compress_internal.h | 26 +++++------
 2 files changed, 42 insertions(+), 50 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 1a414a5dc83..cdd763ff6cf 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1365,7 +1365,6 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
         RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
                         "Reset parameters is only possible during init stage.");
         ZSTD_clearAllDicts(cctx);
-        ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx));
         return ZSTD_CCtxParams_reset(&cctx->requestedParams);
     }
     return 0;
@@ -1752,7 +1751,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
      * be needed. However, we still allocate two 0-sized buffers, which can
      * take space under ASAN. */
     return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-        &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
+        &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
 }
 
 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
@@ -1813,7 +1812,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
 
         return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
             &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
-            ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
+            ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
     }
 }
 
@@ -2119,7 +2118,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
 
     {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
         size_t const blockSize = MIN(params->maxBlockSize, windowSize);
-        size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useSequenceProducer);
+        size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, ZSTD_hasExtSeqProd(params));
         size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
                 ? ZSTD_compressBound(blockSize) + 1
                 : 0;
@@ -2136,7 +2135,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
         size_t const neededSpace =
             ZSTD_estimateCCtxSize_usingCCtxParams_internal(
                 ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
-                buffInSize, buffOutSize, pledgedSrcSize, params->useSequenceProducer, params->maxBlockSize);
+                buffInSize, buffOutSize, pledgedSrcSize, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
         int resizeWorkspace;
 
         FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
@@ -2221,10 +2220,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
         }
 
         /* reserve space for block-level external sequences */
-        if (params->useSequenceProducer) {
+        if (ZSTD_hasExtSeqProd(params)) {
             size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
-            zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
-            zc->externalMatchCtx.seqBuffer =
+            zc->extSeqBufCapacity = maxNbExternalSeq;
+            zc->extSeqBuf =
                 (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
         }
 
@@ -3248,7 +3247,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
             /* External matchfinder + LDM is technically possible, just not implemented yet.
              * We need to revisit soon and implement it. */
             RETURN_ERROR_IF(
-                zc->appliedParams.useSequenceProducer,
+                ZSTD_hasExtSeqProd(&zc->appliedParams),
                 parameter_combination_unsupported,
                 "Long-distance matching with external sequence producer enabled is not currently supported."
             );
@@ -3267,7 +3266,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
             /* External matchfinder + LDM is technically possible, just not implemented yet.
              * We need to revisit soon and implement it. */
             RETURN_ERROR_IF(
-                zc->appliedParams.useSequenceProducer,
+                ZSTD_hasExtSeqProd(&zc->appliedParams),
                 parameter_combination_unsupported,
                 "Long-distance matching with external sequence producer enabled is not currently supported."
             );
@@ -3286,18 +3285,18 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
                                        zc->appliedParams.useRowMatchFinder,
                                        src, srcSize);
             assert(ldmSeqStore.pos == ldmSeqStore.size);
-        } else if (zc->appliedParams.useSequenceProducer) {
+        } else if (ZSTD_hasExtSeqProd(&zc->appliedParams)) {
             assert(
-                zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize)
+                zc->extSeqBufCapacity >= ZSTD_sequenceBound(srcSize)
             );
-            assert(zc->externalMatchCtx.mFinder != NULL);
+            assert(zc->appliedParams.extSeqProdFunc != NULL);
 
             {   U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog;
 
-                size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)(
-                    zc->externalMatchCtx.mState,
-                    zc->externalMatchCtx.seqBuffer,
-                    zc->externalMatchCtx.seqBufferCapacity,
+                size_t const nbExternalSeqs = (zc->appliedParams.extSeqProdFunc)(
+                    zc->appliedParams.extSeqProdState,
+                    zc->extSeqBuf,
+                    zc->extSeqBufCapacity,
                     src, srcSize,
                     NULL, 0,  /* dict and dictSize, currently not supported */
                     zc->appliedParams.compressionLevel,
@@ -3305,21 +3304,21 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
                 );
 
                 size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult(
-                    zc->externalMatchCtx.seqBuffer,
+                    zc->extSeqBuf,
                     nbExternalSeqs,
-                    zc->externalMatchCtx.seqBufferCapacity,
+                    zc->extSeqBufCapacity,
                     srcSize
                 );
 
                 /* Return early if there is no error, since we don't need to worry about last literals */
                 if (!ZSTD_isError(nbPostProcessedSeqs)) {
                     ZSTD_sequencePosition seqPos = {0,0,0};
-                    size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs);
+                    size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->extSeqBuf, nbPostProcessedSeqs);
                     RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!");
                     FORWARD_IF_ERROR(
                         ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
                             zc, &seqPos,
-                            zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs,
+                            zc->extSeqBuf, nbPostProcessedSeqs,
                             src, srcSize,
                             zc->appliedParams.searchForExternalRepcodes
                         ),
@@ -6209,7 +6208,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
 #ifdef ZSTD_MULTITHREAD
     /* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */
     RETURN_ERROR_IF(
-        params.useSequenceProducer == 1 && params.nbWorkers >= 1,
+        ZSTD_hasExtSeqProd(¶ms) && params.nbWorkers >= 1,
         parameter_combination_unsupported,
         "External sequence producer isn't supported with nbWorkers >= 1"
     );
@@ -6501,7 +6500,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
         if (cctx->appliedParams.validateSequences) {
             seqPos->posInSrc += litLength + matchLength;
             FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
-                                                cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
+                                                cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)),
                                                 "Sequence validation failed");
         }
         RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
@@ -6639,7 +6638,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
         if (cctx->appliedParams.validateSequences) {
             seqPos->posInSrc += litLength + matchLength;
             FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
-                                                   cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
+                                                   cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)),
                                                    "Sequence validation failed");
         }
         DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
@@ -7085,19 +7084,14 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH
 }
 
 void ZSTD_registerSequenceProducer(
-    ZSTD_CCtx* zc, void* mState,
-    ZSTD_sequenceProducer_F mFinder
+    ZSTD_CCtx* zc, void* extSeqProdState,
+    ZSTD_sequenceProducer_F extSeqProdFunc
 ) {
-    if (mFinder != NULL) {
-        ZSTD_externalMatchCtx emctx;
-        emctx.mState = mState;
-        emctx.mFinder = mFinder;
-        emctx.seqBuffer = NULL;
-        emctx.seqBufferCapacity = 0;
-        zc->externalMatchCtx = emctx;
-        zc->requestedParams.useSequenceProducer = 1;
+    if (extSeqProdFunc != NULL) {
+        zc->requestedParams.extSeqProdFunc = extSeqProdFunc;
+        zc->requestedParams.extSeqProdState = extSeqProdState;
     } else {
-        ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx));
-        zc->requestedParams.useSequenceProducer = 0;
+        zc->requestedParams.extSeqProdFunc = NULL;
+        zc->requestedParams.extSeqProdState = NULL;
     }
 }
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 819bd3956da..60f22239e1a 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -360,10 +360,11 @@ struct ZSTD_CCtx_params_s {
      * if the external matchfinder returns an error code. */
     int enableMatchFinderFallback;
 
-    /* Indicates whether an external matchfinder has been referenced.
-     * Users can't set this externally.
-     * It is set internally in ZSTD_registerSequenceProducer(). */
-    int useSequenceProducer;
+    /* Parameters for the external sequence producer API.
+     * Users set these parameters through ZSTD_registerSequenceProducer().
+     * It is not possible to set these parameters individually through the public API. */
+    void* extSeqProdState;
+    ZSTD_sequenceProducer_F extSeqProdFunc;
 
     /* Adjust the max block size*/
     size_t maxBlockSize;
@@ -401,14 +402,6 @@ typedef struct {
     ZSTD_entropyCTablesMetadata_t entropyMetadata;
 } ZSTD_blockSplitCtx;
 
-/* Context for block-level external matchfinder API */
-typedef struct {
-  void* mState;
-  ZSTD_sequenceProducer_F mFinder;
-  ZSTD_Sequence* seqBuffer;
-  size_t seqBufferCapacity;
-} ZSTD_externalMatchCtx;
-
 struct ZSTD_CCtx_s {
     ZSTD_compressionStage_e stage;
     int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
@@ -479,8 +472,9 @@ struct ZSTD_CCtx_s {
     /* Workspace for block splitter */
     ZSTD_blockSplitCtx blockSplitCtx;
 
-    /* Workspace for external matchfinder */
-    ZSTD_externalMatchCtx externalMatchCtx;
+    /* Buffer for output from external sequence producer */
+    ZSTD_Sequence* extSeqBuf;
+    size_t extSeqBufCapacity;
 };
 
 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
@@ -1512,6 +1506,10 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
                                    const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
                                    const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
 
+/* Returns 1 if an external sequence producer is registered, otherwise returns 0. */
+MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) {
+    return params->extSeqProdFunc != NULL;
+}
 
 /* ===============================================================
  * Deprecated definitions that are still used internally to avoid

From 468bb173782115e7bd2704f3a9e82341912eebd4 Mon Sep 17 00:00:00 2001
From: aimuz 
Date: Tue, 28 Nov 2023 21:04:37 +0800
Subject: [PATCH 131/283] lib/decompress: check for reserved bit corruption in
 zstd

The patch adds a validation to ensure that the last field, which is
reserved, must be all-zeroes in ZSTD_decodeSeqHeaders. This prevents
potential corruption from going undetected.

Fixes an issue where corrupted input could lead to undefined behavior
due to improper validation of reserved bits.

Signed-off-by: aimuz 
---
 lib/decompress/zstd_decompress_block.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c
index 19cbdc5c16e..80c29db69db 100644
--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@@ -607,6 +607,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
 
     /* FSE table descriptors */
     RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
+    RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */
     {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
         symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
         symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);

From 20f8df64404b125fb76b851ccea944f47b820fd8 Mon Sep 17 00:00:00 2001
From: Ed Maste 
Date: Wed, 13 Dec 2023 10:53:13 -0500
Subject: [PATCH 132/283] Update FreeBSD CI: drop 12.4 as it is nearly EOL

12.4 is EOL as of the end of December 2023, and pkg installation will
start failing some time after that so remove those jobs now.
---
 .cirrus.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 047e77f69fa..2e5e18c01f2 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -3,7 +3,6 @@ task:
   freebsd_instance:
     matrix:
       image_family: freebsd-13-2
-      image_family: freebsd-12-4
   install_script: pkg install -y gmake coreutils
   script: |
     MOREFLAGS="-Werror" gmake -j all

From 2ce0290e4d745846f03956be238596929de88768 Mon Sep 17 00:00:00 2001
From: Ed Maste 
Date: Wed, 13 Dec 2023 19:54:29 -0500
Subject: [PATCH 133/283] zlibWrapper: convert to C89 / ANSI C

Clang 16 (which is the system compiler in FreeBSD 14.0) no longer allows
K&R function definitions.  Formatting of the changes matches current
zlib.
---
 zlibWrapper/examples/example.c          | 67 +++++-------------
 zlibWrapper/examples/example_original.c | 49 ++++---------
 zlibWrapper/examples/minigzip.c         | 63 ++++-------------
 zlibWrapper/gzclose.c                   |  4 +-
 zlibWrapper/gzlib.c                     | 93 ++++++-------------------
 zlibWrapper/gzread.c                    | 75 +++++---------------
 zlibWrapper/gzwrite.c                   | 76 +++++---------------
 7 files changed, 103 insertions(+), 324 deletions(-)

diff --git a/zlibWrapper/examples/example.c b/zlibWrapper/examples/example.c
index d7590e31237..99fbf5b1953 100644
--- a/zlibWrapper/examples/example.c
+++ b/zlibWrapper/examples/example.c
@@ -77,9 +77,7 @@ int  main               _Z_OF((int argc, char *argv[]));
 void *myalloc _Z_OF((void *, unsigned, unsigned));
 void myfree _Z_OF((void *, void *));
 
-void *myalloc(q, n, m)
-    void *q;
-    unsigned n, m;
+void *myalloc(void *q, unsigned n, unsigned m)
 {
     void *buf = calloc(n, m);
     q = Z_NULL;
@@ -110,10 +108,8 @@ void test_gzio          _Z_OF((const char *fname,
 /* ===========================================================================
  * Test compress() and uncompress()
  */
-void test_compress(compr, comprLen, uncompr, uncomprLen)
-    Byte *compr, *uncompr;
-    uLong comprLen, uncomprLen;
-{
+void test_compress(Byte *compr, uLong comprLen, Byte *uncompr,
+                   uLong uncomprLen) {
     int err;
     uLong len = (uLong)strlen(hello)+1;
 
@@ -136,11 +132,7 @@ void test_compress(compr, comprLen, uncompr, uncomprLen)
 /* ===========================================================================
  * Test read/write of .gz files
  */
-void test_gzio(fname, uncompr, uncomprLen)
-    const char *fname; /* compressed file name */
-    Byte *uncompr;
-    uLong uncomprLen;
-{
+void test_gzio(const char *fname, Byte *uncompr, uLong uncomprLen) {
 #ifdef NO_GZCOMPRESS
     fprintf(stderr, "NO_GZCOMPRESS -- gz* functions cannot compress\n");
 #else
@@ -222,10 +214,7 @@ void test_gzio(fname, uncompr, uncomprLen)
 /* ===========================================================================
  * Test deflate() with small buffers
  */
-void test_deflate(compr, comprLen)
-    Byte *compr;
-    uLong comprLen;
-{
+void test_deflate(Byte *compr, uLong comprLen) {
     z_stream c_stream; /* compression stream */
     int err;
     uLong len = (uLong)strlen(hello)+1;
@@ -260,10 +249,8 @@ void test_deflate(compr, comprLen)
 /* ===========================================================================
  * Test inflate() with small buffers
  */
-void test_inflate(compr, comprLen, uncompr, uncomprLen)
-    Byte *compr, *uncompr;
-    uLong comprLen, uncomprLen;
-{
+void test_inflate(Byte *compr, uLong comprLen, Byte *uncompr,
+                  uLong uncomprLen) {
     int err;
     z_stream d_stream; /* decompression stream */
 
@@ -301,10 +288,8 @@ void test_inflate(compr, comprLen, uncompr, uncomprLen)
 /* ===========================================================================
  * Test deflate() with large buffers and dynamic change of compression level
  */
-void test_large_deflate(compr, comprLen, uncompr, uncomprLen)
-    Byte *compr, *uncompr;
-    uLong comprLen, uncomprLen;
-{
+void test_large_deflate(Byte *compr, uLong comprLen, Byte *uncompr,
+                        uLong uncomprLen) {
     z_stream c_stream; /* compression stream */
     int err;
 
@@ -355,11 +340,9 @@ void test_large_deflate(compr, comprLen, uncompr, uncomprLen)
 
 /* ===========================================================================
  * Test inflate() with large buffers
- */
-void test_large_inflate(compr, comprLen, uncompr, uncomprLen)
-    Byte *compr, *uncompr;
-    uLong comprLen, uncomprLen;
-{
+ */ 
+void test_large_inflate(Byte *compr, uLong comprLen, Byte *uncompr,
+                        uLong uncomprLen) {
     int err;
     z_stream d_stream; /* decompression stream */
 
@@ -397,10 +380,7 @@ void test_large_inflate(compr, comprLen, uncompr, uncomprLen)
 /* ===========================================================================
  * Test deflate() with full flush
  */
-void test_flush(compr, comprLen)
-    Byte *compr;
-    uLong *comprLen;
-{
+void test_flush(Byte *compr, uLong *comprLen) {
     z_stream c_stream; /* compression stream */
     int err;
     uInt len = (uInt)strlen(hello)+1;
@@ -435,10 +415,7 @@ void test_flush(compr, comprLen)
 /* ===========================================================================
  * Test inflateSync()
  */
-void test_sync(compr, comprLen, uncompr, uncomprLen)
-    Byte *compr, *uncompr;
-    uLong comprLen, uncomprLen;
-{
+void test_sync(Byte *compr, uLong comprLen, Byte *uncompr, uLong uncomprLen) {
     int err;
     z_stream d_stream; /* decompression stream */
 
@@ -479,10 +456,7 @@ void test_sync(compr, comprLen, uncompr, uncomprLen)
 /* ===========================================================================
  * Test deflate() with preset dictionary
  */
-void test_dict_deflate(compr, comprLen)
-    Byte *compr;
-    uLong comprLen;
-{
+void test_dict_deflate(Byte *compr, uLong comprLen) {
     z_stream c_stream; /* compression stream */
     int err;
 
@@ -516,10 +490,8 @@ void test_dict_deflate(compr, comprLen)
 /* ===========================================================================
  * Test inflate() with a preset dictionary
  */
-void test_dict_inflate(compr, comprLen, uncompr, uncomprLen)
-    Byte *compr, *uncompr;
-    uLong comprLen, uncomprLen;
-{
+void test_dict_inflate(Byte *compr, uLong comprLen, Byte *uncompr,
+                       uLong uncomprLen) {
     int err;
     z_stream d_stream; /* decompression stream */
 
@@ -567,10 +539,7 @@ void test_dict_inflate(compr, comprLen, uncompr, uncomprLen)
  * Usage:  example [output.gz  [input.gz]]
  */
 
-int main(argc, argv)
-    int argc;
-    char *argv[];
-{
+int main(int argc, char *argv[]) {
     Byte *compr, *uncompr;
     uLong comprLen = 10000*sizeof(int); /* don't overflow on MSDOS */
     uLong uncomprLen = comprLen;
diff --git a/zlibWrapper/examples/example_original.c b/zlibWrapper/examples/example_original.c
index 5b4e4d1d8f6..828b06c881c 100644
--- a/zlibWrapper/examples/example_original.c
+++ b/zlibWrapper/examples/example_original.c
@@ -102,9 +102,7 @@ void test_gzio          _Z_OF((const char *fname,
 /* ===========================================================================
  * Test compress() and uncompress()
  */
-void test_compress(compr, comprLen, uncompr, uncomprLen)
-    Byte *compr, *uncompr;
-    uLong comprLen, uncomprLen;
+void test_compress(Byte *compr, uLong comprLen, Byte *uncompr, uLong uncomprLen)
 {
     int err;
     uLong len = (uLong)strlen(hello)+1;
@@ -128,10 +126,8 @@ void test_compress(compr, comprLen, uncompr, uncomprLen)
 /* ===========================================================================
  * Test read/write of .gz files
  */
-void test_gzio(fname, uncompr, uncomprLen)
-    const char *fname; /* compressed file name */
-    Byte *uncompr;
-    uLong uncomprLen;
+void test_gzio(const char *fname /* compressed file name */, Byte *uncompr,
+    uLong uncomprLen)
 {
 #ifdef NO_GZCOMPRESS
     fprintf(stderr, "NO_GZCOMPRESS -- gz* functions cannot compress\n");
@@ -214,9 +210,7 @@ void test_gzio(fname, uncompr, uncomprLen)
 /* ===========================================================================
  * Test deflate() with small buffers
  */
-void test_deflate(compr, comprLen)
-    Byte *compr;
-    uLong comprLen;
+void test_deflate(Byte *compr, uLong comprLen)
 {
     z_stream c_stream; /* compression stream */
     int err;
@@ -252,9 +246,7 @@ void test_deflate(compr, comprLen)
 /* ===========================================================================
  * Test inflate() with small buffers
  */
-void test_inflate(compr, comprLen, uncompr, uncomprLen)
-    Byte *compr, *uncompr;
-    uLong comprLen, uncomprLen;
+void test_inflate(Byte *compr, uLong comprLen, Byte *uncompr, uLong uncomprLen)
 {
     int err;
     z_stream d_stream; /* decompression stream */
@@ -293,9 +285,8 @@ void test_inflate(compr, comprLen, uncompr, uncomprLen)
 /* ===========================================================================
  * Test deflate() with large buffers and dynamic change of compression level
  */
-void test_large_deflate(compr, comprLen, uncompr, uncomprLen)
-    Byte *compr, *uncompr;
-    uLong comprLen, uncomprLen;
+void test_large_deflate(Byte *compr, uLong comprLen, Byte *uncompr,
+    uLong uncomprLen)
 {
     z_stream c_stream; /* compression stream */
     int err;
@@ -348,9 +339,8 @@ void test_large_deflate(compr, comprLen, uncompr, uncomprLen)
 /* ===========================================================================
  * Test inflate() with large buffers
  */
-void test_large_inflate(compr, comprLen, uncompr, uncomprLen)
-    Byte *compr, *uncompr;
-    uLong comprLen, uncomprLen;
+void test_large_inflate(Byte *compr, uLong comprLen, Byte *uncompr,
+    uLong uncomprLen)
 {
     int err;
     z_stream d_stream; /* decompression stream */
@@ -389,9 +379,7 @@ void test_large_inflate(compr, comprLen, uncompr, uncomprLen)
 /* ===========================================================================
  * Test deflate() with full flush
  */
-void test_flush(compr, comprLen)
-    Byte *compr;
-    uLong *comprLen;
+void test_flush(Byte *compr, uLong comprLen)
 {
     z_stream c_stream; /* compression stream */
     int err;
@@ -427,9 +415,7 @@ void test_flush(compr, comprLen)
 /* ===========================================================================
  * Test inflateSync()
  */
-void test_sync(compr, comprLen, uncompr, uncomprLen)
-    Byte *compr, *uncompr;
-    uLong comprLen, uncomprLen;
+void test_sync(Byte *compr, uLong comprLen, Byte *uncompr, uLong uncomprLen)
 {
     int err;
     z_stream d_stream; /* decompression stream */
@@ -471,9 +457,7 @@ void test_sync(compr, comprLen, uncompr, uncomprLen)
 /* ===========================================================================
  * Test deflate() with preset dictionary
  */
-void test_dict_deflate(compr, comprLen)
-    Byte *compr;
-    uLong comprLen;
+void test_dict_deflate(Byte *compr, uLong comprLen)
 {
     z_stream c_stream; /* compression stream */
     int err;
@@ -508,9 +492,8 @@ void test_dict_deflate(compr, comprLen)
 /* ===========================================================================
  * Test inflate() with a preset dictionary
  */
-void test_dict_inflate(compr, comprLen, uncompr, uncomprLen)
-    Byte *compr, *uncompr;
-    uLong comprLen, uncomprLen;
+void test_dict_inflate(Byte *compr, uLong comprLen, Byte *uncompr,
+    uLong uncomprLen)
 {
     int err;
     z_stream d_stream; /* decompression stream */
@@ -559,9 +542,7 @@ void test_dict_inflate(compr, comprLen, uncompr, uncomprLen)
  * Usage:  example [output.gz  [input.gz]]
  */
 
-int main(argc, argv)
-    int argc;
-    char *argv[];
+int main(int argc, char *argv[])
 {
     Byte *compr, *uncompr;
     uLong comprLen = 10000*sizeof(int); /* don't overflow on MSDOS */
diff --git a/zlibWrapper/examples/minigzip.c b/zlibWrapper/examples/minigzip.c
index 67a17907b47..1af81520739 100644
--- a/zlibWrapper/examples/minigzip.c
+++ b/zlibWrapper/examples/minigzip.c
@@ -82,8 +82,7 @@
    The strwinerror function does not change the current setting
    of GetLastError.  */
 
-static char *strwinerror (error)
-     DWORD error;
+static char *strwinerror(DWORD error)
 {
     static char buf[1024];
 
@@ -121,8 +120,7 @@ static char *strwinerror (error)
     return buf;
 }
 
-static void pwinerror (s)
-    const char *s;
+static void pwinerror (const char *s)
 {
     if (s && *s)
         fprintf(stderr, "%s: %s\n", s, strwinerror(GetLastError ()));
@@ -198,11 +196,7 @@ const char *mode;
     return gz_open(NULL, fd, mode);
 }
 
-gzFile gz_open(path, fd, mode)
-    const char *path;
-    int fd;
-    const char *mode;
-{
+gzFile gz_open(const char *path, int fd, const char *mode) {
     gzFile gz;
     int ret;
 
@@ -238,11 +232,7 @@ gzFile gz_open(path, fd, mode)
 
 int gzwrite _Z_OF((gzFile, const void *, unsigned));
 
-int gzwrite(gz, buf, len)
-    gzFile gz;
-    const void *buf;
-    unsigned len;
-{
+int gzwrite(gzFile gz, const void *buf, unsigned len) {
     z_stream *strm;
     unsigned char out[BUFLEN];
 
@@ -262,11 +252,7 @@ int gzwrite(gz, buf, len)
 
 int gzread _Z_OF((gzFile, void *, unsigned));
 
-int gzread(gz, buf, len)
-    gzFile gz;
-    void *buf;
-    unsigned len;
-{
+int gzread(gzFile gz, void *buf, unsigned len) {
     int ret;
     unsigned got;
     unsigned char in[1];
@@ -299,9 +285,7 @@ int gzread(gz, buf, len)
 
 int gzclose _Z_OF((gzFile));
 
-int gzclose(gz)
-    gzFile gz;
-{
+int gzclose(gzFile gz) {
     z_stream *strm;
     unsigned char out[BUFLEN];
 
@@ -328,9 +312,7 @@ int gzclose(gz)
 
 const char *gzerror _Z_OF((gzFile, int *));
 
-const char *gzerror(gz, err)
-    gzFile gz;
-    int *err;
+const char *gzerror(gzFile gz, int *err)
 {
     *err = gz->err;
     return gz->msg;
@@ -353,8 +335,7 @@ int  main             _Z_OF((int argc, char *argv[]));
 /* ===========================================================================
  * Display error message and exit
  */
-void error(msg)
-    const char *msg;
+void error(const char *msg)
 {
     fprintf(stderr, "%s: %s\n", prog, msg);
     exit(1);
@@ -364,9 +345,7 @@ void error(msg)
  * Compress input to output then close both files.
  */
 
-void gz_compress(in, out)
-    FILE   *in;
-    gzFile out;
+void gz_compress(FILE *in, gzFile out)
 {
     local char buf[BUFLEN];
     int len;
@@ -397,10 +376,7 @@ void gz_compress(in, out)
 /* Try compressing the input file at once using mmap. Return Z_OK if
  * if success, Z_ERRNO otherwise.
  */
-int gz_compress_mmap(in, out)
-    FILE   *in;
-    gzFile out;
-{
+int gz_compress_mmap(FILE *in, gzFile out) {
     int len;
     int err;
     int ifd = fileno(in);
@@ -432,10 +408,7 @@ int gz_compress_mmap(in, out)
 /* ===========================================================================
  * Uncompress input to output then close both files.
  */
-void gz_uncompress(in, out)
-    gzFile in;
-    FILE   *out;
-{
+void gz_uncompress(gzFile in, FILE *out) {
     local char buf[BUFLEN];
     int len;
     int err;
@@ -459,10 +432,7 @@ void gz_uncompress(in, out)
  * Compress the given file: create a corresponding .gz file and remove the
  * original.
  */
-void file_compress(file, mode)
-    char  *file;
-    char  *mode;
-{
+void file_compress(char *file, char *mode) {
     local char outfile[MAX_NAME_LEN];
     FILE  *in;
     gzFile out;
@@ -494,9 +464,7 @@ void file_compress(file, mode)
 /* ===========================================================================
  * Uncompress the given file and remove the original.
  */
-void file_uncompress(file)
-    char  *file;
-{
+void file_uncompress(char *file) {
     local char buf[MAX_NAME_LEN];
     char *infile, *outfile;
     FILE  *out;
@@ -546,10 +514,7 @@ void file_uncompress(file)
  *   -1 to -9 : compression level
  */
 
-int main(argc, argv)
-    int argc;
-    char *argv[];
-{
+int main(int argc, char *argv[]) {
     int copyout = 0;
     int uncompr = 0;
     gzFile file;
diff --git a/zlibWrapper/gzclose.c b/zlibWrapper/gzclose.c
index ba43b8c5e44..12a2dfc5895 100644
--- a/zlibWrapper/gzclose.c
+++ b/zlibWrapper/gzclose.c
@@ -11,9 +11,7 @@
 /* gzclose() is in a separate file so that it is linked in only if it is used.
    That way the other gzclose functions can be used instead to avoid linking in
    unneeded compression or decompression routines. */
-int ZEXPORT gzclose(file)
-    gzFile file;
-{
+int ZEXPORT gzclose(gzFile file) {
 #ifndef NO_GZCOMPRESS
     gz_statep state;
 
diff --git a/zlibWrapper/gzlib.c b/zlibWrapper/gzlib.c
index eea480a74c9..c726515113c 100644
--- a/zlibWrapper/gzlib.c
+++ b/zlibWrapper/gzlib.c
@@ -33,9 +33,7 @@ local gzFile gz_open _Z_OF((const void *, int, const char *));
 
    The gz_strwinerror function does not change the current setting of
    GetLastError. */
-char ZLIB_INTERNAL *gz_strwinerror (error)
-     DWORD error;
-{
+char ZLIB_INTERNAL *gz_strwinerror(DWORD error) {
     static char buf[1024];
 
     wchar_t *msgbuf;
@@ -75,9 +73,7 @@ char ZLIB_INTERNAL *gz_strwinerror (error)
 #endif /* UNDER_CE */
 
 /* Reset gzip file state */
-local void gz_reset(state)
-    gz_statep state;
-{
+local void gz_reset(gz_statep state) {
     state.state->x.have = 0;              /* no output data available */
     if (state.state->mode == GZ_READ) {   /* for reading ... */
         state.state->eof = 0;             /* not at end of file */
@@ -91,11 +87,7 @@ local void gz_reset(state)
 }
 
 /* Open a gzip file either by name or file descriptor. */
-local gzFile gz_open(path, fd, mode)
-    const void *path;
-    int fd;
-    const char *mode;
-{
+local gzFile gz_open(const void *path, int fd, const char *mode) {
     gz_statep state;
     z_size_t len;
     int oflag;
@@ -270,26 +262,17 @@ local gzFile gz_open(path, fd, mode)
 }
 
 /* -- see zlib.h -- */
-gzFile ZEXPORT gzopen(path, mode)
-    const char *path;
-    const char *mode;
-{
+gzFile ZEXPORT gzopen(const char *path, const char *mode) {
     return gz_open(path, -1, mode);
 }
 
 /* -- see zlib.h -- */
-gzFile ZEXPORT gzopen64(path, mode)
-    const char *path;
-    const char *mode;
-{
+gzFile ZEXPORT gzopen64(const char *path, const char *mode) {
     return gz_open(path, -1, mode);
 }
 
 /* -- see zlib.h -- */
-gzFile ZEXPORT gzdopen(fd, mode)
-    int fd;
-    const char *mode;
-{
+gzFile ZEXPORT gzdopen(int fd, const char *mode) {
     char *path;         /* identifier for error messages */
     gzFile gz;
 
@@ -307,19 +290,13 @@ gzFile ZEXPORT gzdopen(fd, mode)
 
 /* -- see zlib.h -- */
 #ifdef WIDECHAR
-gzFile ZEXPORT gzopen_w(path, mode)
-    const wchar_t *path;
-    const char *mode;
-{
+gzFile ZEXPORT gzopen_w(const wchar_t *path, const char *mode) {
     return gz_open(path, -2, mode);
 }
 #endif
 
 /* -- see zlib.h -- */
-int ZEXPORT gzbuffer(file, size)
-    gzFile file;
-    unsigned size;
-{
+int ZEXPORT gzbuffer(gzFile file, unsigned size) {
     gz_statep state;
 
     /* get internal structure and check integrity */
@@ -343,9 +320,7 @@ int ZEXPORT gzbuffer(file, size)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzrewind(file)
-    gzFile file;
-{
+int ZEXPORT gzrewind(gzFile file) {
     gz_statep state;
 
     /* get internal structure */
@@ -366,11 +341,7 @@ int ZEXPORT gzrewind(file)
 }
 
 /* -- see zlib.h -- */
-z_off64_t ZEXPORT gzseek64(file, offset, whence)
-    gzFile file;
-    z_off64_t offset;
-    int whence;
-{
+z_off64_t ZEXPORT gzseek64(gzFile file, z_off64_t offset, int whence) {
     unsigned n;
     z_off64_t ret;
     gz_statep state;
@@ -443,11 +414,7 @@ z_off64_t ZEXPORT gzseek64(file, offset, whence)
 }
 
 /* -- see zlib.h -- */
-z_off_t ZEXPORT gzseek(file, offset, whence)
-    gzFile file;
-    z_off_t offset;
-    int whence;
-{
+z_off_t ZEXPORT gzseek(gzFile file, z_off_t offset, int whence) {
     z_off64_t ret;
 
     ret = gzseek64(file, (z_off64_t)offset, whence);
@@ -455,9 +422,7 @@ z_off_t ZEXPORT gzseek(file, offset, whence)
 }
 
 /* -- see zlib.h -- */
-z_off64_t ZEXPORT gztell64(file)
-    gzFile file;
-{
+z_off64_t ZEXPORT gztell64(gzFile file) {
     gz_statep state;
 
     /* get internal structure and check integrity */
@@ -472,9 +437,7 @@ z_off64_t ZEXPORT gztell64(file)
 }
 
 /* -- see zlib.h -- */
-z_off_t ZEXPORT gztell(file)
-    gzFile file;
-{
+z_off_t ZEXPORT gztell(gzFile file) {
     z_off64_t ret;
 
     ret = gztell64(file);
@@ -482,9 +445,7 @@ z_off_t ZEXPORT gztell(file)
 }
 
 /* -- see zlib.h -- */
-z_off64_t ZEXPORT gzoffset64(file)
-    gzFile file;
-{
+z_off64_t ZEXPORT gzoffset64(gzFile file) {
     z_off64_t offset;
     gz_statep state;
 
@@ -505,9 +466,7 @@ z_off64_t ZEXPORT gzoffset64(file)
 }
 
 /* -- see zlib.h -- */
-z_off_t ZEXPORT gzoffset(file)
-    gzFile file;
-{
+z_off_t ZEXPORT gzoffset(gzFile file) {
     z_off64_t ret;
 
     ret = gzoffset64(file);
@@ -515,9 +474,7 @@ z_off_t ZEXPORT gzoffset(file)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzeof(file)
-    gzFile file;
-{
+int ZEXPORT gzeof(gzFile file) {
     gz_statep state;
 
     /* get internal structure and check integrity */
@@ -532,10 +489,7 @@ int ZEXPORT gzeof(file)
 }
 
 /* -- see zlib.h -- */
-const char * ZEXPORT gzerror(file, errnum)
-    gzFile file;
-    int *errnum;
-{
+const char * ZEXPORT gzerror(gzFile file, int *errnum) {
     gz_statep state;
 
     /* get internal structure and check integrity */
@@ -553,9 +507,7 @@ const char * ZEXPORT gzerror(file, errnum)
 }
 
 /* -- see zlib.h -- */
-void ZEXPORT gzclearerr(file)
-    gzFile file;
-{
+void ZEXPORT gzclearerr(gzFile file) {
     gz_statep state;
 
     /* get internal structure and check integrity */
@@ -579,11 +531,7 @@ void ZEXPORT gzclearerr(file)
    memory).  Simply save the error message as a static string.  If there is an
    allocation failure constructing the error message, then convert the error to
    out of memory. */
-void ZLIB_INTERNAL gz_error(state, err, msg)
-    gz_statep state;
-    int err;
-    const char *msg;
-{
+void ZLIB_INTERNAL gz_error(gz_statep state, int err, const char *msg) {
     /* free previously allocated message and clear */
     if (state.state->msg != NULL) {
         if (state.state->err != Z_MEM_ERROR)
@@ -625,8 +573,7 @@ void ZLIB_INTERNAL gz_error(state, err, msg)
    available) -- we need to do this to cover cases where 2's complement not
    used, since C standard permits 1's complement and sign-bit representations,
    otherwise we could just use ((unsigned)-1) >> 1 */
-unsigned ZLIB_INTERNAL gz_intmax()
-{
+unsigned ZLIB_INTERNAL gz_intmax() {
     unsigned p, q;
 
     p = 1;
diff --git a/zlibWrapper/gzread.c b/zlibWrapper/gzread.c
index 584fad1eaa8..ed3c1782606 100644
--- a/zlibWrapper/gzread.c
+++ b/zlibWrapper/gzread.c
@@ -29,12 +29,8 @@ local z_size_t gz_read _Z_OF((gz_statep, voidp, z_size_t));
    state.state->fd, and update state.state->eof, state.state->err, and state.state->msg as appropriate.
    This function needs to loop on read(), since read() is not guaranteed to
    read the number of bytes requested, depending on the type of descriptor. */
-local int gz_load(state, buf, len, have)
-    gz_statep state;
-    unsigned char *buf;
-    unsigned len;
-    unsigned *have;
-{
+local int gz_load(gz_statep state, unsigned char *buf, unsigned len,
+                  unsigned *have) {
     ssize_t ret;
     unsigned get, max = ((unsigned)-1 >> 2) + 1;
 
@@ -64,8 +60,7 @@ local int gz_load(state, buf, len, have)
    If strm->avail_in != 0, then the current data is moved to the beginning of
    the input buffer, and then the remainder of the buffer is loaded with the
    available data from the input file. */
-local int gz_avail(state)
-    gz_statep state;
+local int gz_avail(gz_statep state)
 {
     unsigned got;
     z_streamp strm = &(state.state->strm);
@@ -99,9 +94,7 @@ local int gz_avail(state)
    case, all further file reads will be directly to either the output buffer or
    a user buffer.  If decompressing, the inflate state will be initialized.
    gz_look() will return 0 on success or -1 on failure. */
-local int gz_look(state)
-    gz_statep state;
-{
+local int gz_look(gz_statep state) {
     z_streamp strm = &(state.state->strm);
 
     /* allocate read buffers and inflate memory */
@@ -184,9 +177,7 @@ local int gz_look(state)
    data.  If the gzip stream completes, state.state->how is reset to LOOK to look for
    the next gzip stream or raw data, once state.state->x.have is depleted.  Returns 0
    on success, -1 on failure. */
-local int gz_decomp(state)
-    gz_statep state;
-{
+local int gz_decomp(gz_statep state) {
     int ret = Z_OK;
     unsigned had;
     z_streamp strm = &(state.state->strm);
@@ -238,9 +229,7 @@ local int gz_decomp(state)
    looked for to determine whether to copy or decompress.  Returns -1 on error,
    otherwise 0.  gz_fetch() will leave state.state->how as COPY or GZIP unless the
    end of the input file has been reached and all data has been processed.  */
-local int gz_fetch(state)
-    gz_statep state;
-{
+local int gz_fetch(gz_statep state) {
     z_streamp strm = &(state.state->strm);
 
     do {
@@ -268,10 +257,7 @@ local int gz_fetch(state)
 }
 
 /* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
-local int gz_skip(state, len)
-    gz_statep state;
-    z_off64_t len;
-{
+local int gz_skip(gz_statep state, z_off64_t len) {
     unsigned n;
 
     /* skip over len bytes or reach end-of-file, whichever comes first */
@@ -303,11 +289,7 @@ local int gz_skip(state, len)
    input.  Return the number of bytes read.  If zero is returned, either the
    end of file was reached, or there was an error.  state.state->err must be
    consulted in that case to determine which. */
-local z_size_t gz_read(state, buf, len)
-    gz_statep state;
-    voidp buf;
-    z_size_t len;
-{
+local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len) {
     z_size_t got;
     unsigned n;
 
@@ -384,11 +366,7 @@ local z_size_t gz_read(state, buf, len)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzread(file, buf, len)
-    gzFile file;
-    voidp buf;
-    unsigned len;
-{
+int ZEXPORT gzread(gzFile file, voidp buf, unsigned len) {
     gz_statep state;
 
     /* get internal structure */
@@ -420,12 +398,8 @@ int ZEXPORT gzread(file, buf, len)
 }
 
 /* -- see zlib.h -- */
-z_size_t ZEXPORT gzfread(buf, size, nitems, file)
-    voidp buf;
-    z_size_t size;
-    z_size_t nitems;
-    gzFile file;
-{
+z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems,
+                         gzFile file) {
     z_size_t len;
     gz_statep state;
 
@@ -468,9 +442,7 @@ ZEXTERN int ZEXPORT gzgetc _Z_OF((gzFile file));
 ZEXTERN int ZEXPORT gzgetc_ _Z_OF((gzFile file));
 #endif
 
-int ZEXPORT gzgetc(file)
-    gzFile file;
-{
+int ZEXPORT gzgetc(gzFile file) {
     int ret;
     unsigned char buf[1];
     gz_statep state;
@@ -497,17 +469,12 @@ int ZEXPORT gzgetc(file)
     return ret < 1 ? -1 : buf[0];
 }
 
-int ZEXPORT gzgetc_(file)
-gzFile file;
-{
+int ZEXPORT gzgetc_(gzFile file) {
     return gzgetc(file);
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzungetc(c, file)
-    int c;
-    gzFile file;
-{
+int ZEXPORT gzungetc(int c, gzFile file) {
     gz_statep state;
 
     /* get internal structure */
@@ -564,11 +531,7 @@ int ZEXPORT gzungetc(c, file)
 }
 
 /* -- see zlib.h -- */
-char * ZEXPORT gzgets(file, buf, len)
-    gzFile file;
-    char *buf;
-    int len;
-{
+char * ZEXPORT gzgets(gzFile file, char *buf, int len) {
     unsigned left, n;
     char *str;
     unsigned char *eol;
@@ -628,9 +591,7 @@ char * ZEXPORT gzgets(file, buf, len)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzdirect(file)
-    gzFile file;
-{
+int ZEXPORT gzdirect(gzFile file) {
     gz_statep state;
 
     /* get internal structure */
@@ -648,9 +609,7 @@ int ZEXPORT gzdirect(file)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzclose_r(file)
-    gzFile file;
-{
+int ZEXPORT gzclose_r(gzFile file) {
     int ret, err;
     gz_statep state;
 
diff --git a/zlibWrapper/gzwrite.c b/zlibWrapper/gzwrite.c
index ccd4f71f13a..81da15314a9 100644
--- a/zlibWrapper/gzwrite.c
+++ b/zlibWrapper/gzwrite.c
@@ -19,9 +19,7 @@ local z_size_t gz_write _Z_OF((gz_statep, voidpc, z_size_t));
 /* Initialize state for writing a gzip file.  Mark initialization by setting
    state.state->size to non-zero.  Return -1 on a memory allocation failure, or 0 on
    success. */
-local int gz_init(state)
-    gz_statep state;
-{
+local int gz_init(gz_statep state) {
     int ret;
     z_streamp strm = &(state.state->strm);
 
@@ -75,10 +73,7 @@ local int gz_init(state)
    deflate() flush value.  If flush is Z_FINISH, then the deflate() state is
    reset to start a new gzip stream.  If gz->direct is true, then simply write
    to the output file without compressing, and ignore flush. */
-local int gz_comp(state, flush)
-    gz_statep state;
-    int flush;
-{
+local int gz_comp(gz_statep state, int flush) {
     int ret, writ;
     unsigned have, put, max = ((unsigned)-1 >> 2) + 1;
     z_streamp strm = &(state.state->strm);
@@ -147,10 +142,7 @@ local int gz_comp(state, flush)
 
 /* Compress len zeros to output.  Return -1 on a write error or memory
    allocation failure by gz_comp(), or 0 on success. */
-local int gz_zero(state, len)
-    gz_statep state;
-    z_off64_t len;
-{
+local int gz_zero(gz_statep state, z_off64_t len) {
     int first;
     unsigned n;
     z_streamp strm = &(state.state->strm);
@@ -180,11 +172,7 @@ local int gz_zero(state, len)
 
 /* Write len bytes from buf to file.  Return the number of bytes written.  If
    the returned value is less than len, then there was an error. */
-local z_size_t gz_write(state, buf, len)
-    gz_statep state;
-    voidpc buf;
-    z_size_t len;
-{
+local z_size_t gz_write(gz_statep state, voidpc buf, z_size_t len) {
     z_size_t put = len;
 
     /* if len is zero, avoid unnecessary operations */
@@ -248,11 +236,7 @@ local z_size_t gz_write(state, buf, len)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzwrite(file, buf, len)
-    gzFile file;
-    voidpc buf;
-    unsigned len;
-{
+int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len) {
     gz_statep state;
 
     /* get internal structure */
@@ -276,12 +260,8 @@ int ZEXPORT gzwrite(file, buf, len)
 }
 
 /* -- see zlib.h -- */
-z_size_t ZEXPORT gzfwrite(buf, size, nitems, file)
-    voidpc buf;
-    z_size_t size;
-    z_size_t nitems;
-    gzFile file;
-{
+z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size, z_size_t nitems,
+                          gzFile file) {
     z_size_t len;
     gz_statep state;
 
@@ -307,10 +287,7 @@ z_size_t ZEXPORT gzfwrite(buf, size, nitems, file)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzputc(file, c)
-    gzFile file;
-    int c;
-{
+int ZEXPORT gzputc(gzFile file, int c) {
     unsigned have;
     unsigned char buf[1];
     gz_statep state;
@@ -355,10 +332,7 @@ int ZEXPORT gzputc(file, c)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzputs(file, str)
-    gzFile file;
-    const char *str;
-{
+int ZEXPORT gzputs(gzFile file, const char *str) {
     int ret;
     z_size_t len;
     gz_statep state;
@@ -382,8 +356,7 @@ int ZEXPORT gzputs(file, str)
 #include 
 
 /* -- see zlib.h -- */
-int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va)
-{
+int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va) {
     int len;
     unsigned left;
     char *next;
@@ -454,8 +427,7 @@ int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va)
     return len;
 }
 
-int ZEXPORTVA gzprintf(gzFile file, const char *format, ...)
-{
+int ZEXPORTVA gzprintf(gzFile file, const char *format, ...) {
     va_list va;
     int ret;
 
@@ -468,13 +440,10 @@ int ZEXPORTVA gzprintf(gzFile file, const char *format, ...)
 #else /* !STDC && !Z_HAVE_STDARG_H */
 
 /* -- see zlib.h -- */
-int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
-                       a11, a12, a13, a14, a15, a16, a17, a18, a19, a20)
-    gzFile file;
-    const char *format;
-    int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
-        a11, a12, a13, a14, a15, a16, a17, a18, a19, a20;
-{
+int ZEXPORTVA gzprintf(gzFile file, const char *format, int a1, int a2, int a3,
+                       int a4, int a5, int a6, int a7, int a8, int a9, int a10,
+                       int a11, int a12, int a13, int a14, int a15, int a16,
+                       int a17, int a18, int a19, int a20) {
     unsigned len, left;
     char *next;
     gz_statep state;
@@ -556,10 +525,7 @@ int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
 #endif
 
 /* -- see zlib.h -- */
-int ZEXPORT gzflush(file, flush)
-    gzFile file;
-    int flush;
-{
+int ZEXPORT gzflush(gzFile file, int flush) {
     gz_statep state;
 
     /* get internal structure */
@@ -588,11 +554,7 @@ int ZEXPORT gzflush(file, flush)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzsetparams(file, level, strategy)
-    gzFile file;
-    int level;
-    int strategy;
-{
+int ZEXPORT gzsetparams(gzFile file, int level, int strategy) {
     gz_statep state;
     z_streamp strm;
 
@@ -630,9 +592,7 @@ int ZEXPORT gzsetparams(file, level, strategy)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzclose_w(file)
-    gzFile file;
-{
+int ZEXPORT gzclose_w(gzFile file) {
     int ret = Z_OK;
     gz_statep state;
 

From 9a3b17c4d61f00e22997d946f422533564812fe3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Chojnowski?= 
Date: Sat, 16 Dec 2023 12:32:52 +0100
Subject: [PATCH 134/283] Fix a nullptr dereference in
 ZSTD_createCDict_advanced2()

If the relevant allocation returns NULL, ZSTD_createCDict_advanced_internal()
will return NULL. But ZSTD_createCDict_advanced2() doesn't check for
this and attempts to use the returned pointer anyway, which leads to
a segfault.
---
 lib/compress/zstd_compress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index cdd763ff6cf..dc892dae7a4 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -5525,7 +5525,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced2(
                         cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
                         customMem);
 
-    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+    if (!cdict || ZSTD_isError( ZSTD_initCDict_internal(cdict,
                                     dict, dictSize,
                                     dictLoadMethod, dictContentType,
                                     cctxParams) )) {

From e515327764889938692dac3257a300df56a15e8f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 18 Dec 2023 05:26:39 +0000
Subject: [PATCH 135/283] Bump actions/upload-artifact from 3.1.3 to 4.0.0

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3.1.3 to 4.0.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/a8a3f3ad30e3422c9c7b888a15615d19a852ae32...c7d193f32edcb7bfad88892161225aeda64e9392)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] 
---
 .github/workflows/dev-long-tests.yml    | 2 +-
 .github/workflows/scorecards.yml        | 2 +-
 .github/workflows/windows-artifacts.yml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml
index dbef98982d9..4a47c9c89b6 100644
--- a/.github/workflows/dev-long-tests.yml
+++ b/.github/workflows/dev-long-tests.yml
@@ -290,7 +290,7 @@ jobs:
         dry-run: false
         sanitizer: ${{ matrix.sanitizer }}
     - name: Upload Crash
-      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # tag=v3.1.3
+      uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # tag=v4.0.0
       if: failure() && steps.build.outcome == 'success'
       with:
         name: ${{ matrix.sanitizer }}-artifacts
diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 7518519dcc3..2d698967d85 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -51,7 +51,7 @@ jobs:
       # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
       # format to the repository Actions tab.
       - name: "Upload artifact"
-        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # tag=v3.1.3
+        uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # tag=v4.0.0
         with:
           name: SARIF file
           path: results.sarif
diff --git a/.github/workflows/windows-artifacts.yml b/.github/workflows/windows-artifacts.yml
index 70c2d163b0e..2300d09bf30 100644
--- a/.github/workflows/windows-artifacts.yml
+++ b/.github/workflows/windows-artifacts.yml
@@ -52,7 +52,7 @@ jobs:
         cd ..
 
     - name: Publish zstd-$VERSION-${{matrix.ziparch}}.zip
-      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # tag=v3
+      uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # tag=v3
       with:
         path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip
         name: zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip

From 8052cd0131a4f483ef14be3e564530c07ea382f5 Mon Sep 17 00:00:00 2001
From: Ruslan Sayfutdinov 
Date: Sat, 16 Dec 2023 15:28:19 +0000
Subject: [PATCH 136/283] cli: better errors on arguent parsing

---
 programs/zstdcli.c                        | 48 +++++++++++++----------
 tests/cli-tests/basic/args.sh             | 10 +++++
 tests/cli-tests/basic/args.sh.exit        |  1 +
 tests/cli-tests/basic/args.sh.stderr.glob | 28 +++++++++++++
 4 files changed, 66 insertions(+), 21 deletions(-)
 create mode 100755 tests/cli-tests/basic/args.sh
 create mode 100644 tests/cli-tests/basic/args.sh.exit
 create mode 100644 tests/cli-tests/basic/args.sh.stderr.glob

diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index 66952aa8293..3f0ae8bdd21 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -138,8 +138,8 @@ static int exeNameMatch(const char* exeName, const char* test)
 *  Command Line
 **************************************/
 /* print help either in `stderr` or `stdout` depending on originating request
- * error (badusage) => stderr
- * help (usage_advanced) => stdout
+ * error (badUsage) => stderr
+ * help (usageAdvanced) => stdout
  */
 static void usage(FILE* f, const char* programName)
 {
@@ -175,7 +175,7 @@ static void usage(FILE* f, const char* programName)
     DISPLAY_F(f, "\n");
 }
 
-static void usage_advanced(const char* programName)
+static void usageAdvanced(const char* programName)
 {
     DISPLAYOUT(WELCOME_MESSAGE);
     DISPLAYOUT("\n");
@@ -316,9 +316,9 @@ static void usage_advanced(const char* programName)
 
 }
 
-static void badusage(const char* programName)
+static void badUsage(const char* programName, const char* parameter)
 {
-    DISPLAYLEVEL(1, "Incorrect parameters \n");
+    DISPLAYLEVEL(1, "Incorrect parameter: %s\n", parameter);
     if (g_displayLevel >= 2) usage(stderr, programName);
 }
 
@@ -589,7 +589,7 @@ static ZDICT_fastCover_params_t defaultFastCoverParams(void)
 
 
 /** parseAdaptParameters() :
- *  reads adapt parameters from *stringPtr (e.g. "--zstd=min=1,max=19) and store them into adaptMinPtr and adaptMaxPtr.
+ *  reads adapt parameters from *stringPtr (e.g. "--adapt=min=1,max=19) and store them into adaptMinPtr and adaptMaxPtr.
  *  Both adaptMinPtr and adaptMaxPtr must be already allocated and correctly initialized.
  *  There is no guarantee that any of these values will be updated.
  *  @return 1 means that parsing was successful,
@@ -933,6 +933,7 @@ int main(int argCount, const char* argv[])
     /* command switches */
     for (argNb=1; argNb&2
+zstd --blah
+println "+ zstd -xz" >&2
+zstd -xz
+println "+ zstd --adapt=min=1,maxx=2 file.txt" >&2
+zstd --adapt=min=1,maxx=2 file.txt
+println "+ zstd --train-cover=k=48,d=8,steps32 file.txt" >&2
+zstd --train-cover=k=48,d=8,steps32 file.txt
diff --git a/tests/cli-tests/basic/args.sh.exit b/tests/cli-tests/basic/args.sh.exit
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/cli-tests/basic/args.sh.exit
@@ -0,0 +1 @@
+1
diff --git a/tests/cli-tests/basic/args.sh.stderr.glob b/tests/cli-tests/basic/args.sh.stderr.glob
new file mode 100644
index 00000000000..df275471ca8
--- /dev/null
+++ b/tests/cli-tests/basic/args.sh.stderr.glob
@@ -0,0 +1,28 @@
++ zstd --blah
+Incorrect parameter: --blah
+...
+Usage: zstd *
+
+Options:
+...
++ zstd -xz
+Incorrect parameter: -x
+...
+Usage: zstd *
+
+Options:
+...
++ zstd --adapt=min=1,maxx=2 file.txt
+Incorrect parameter: --adapt=min=1,maxx=2
+...
+Usage: zstd *
+
+Options:
+...
++ zstd --train-cover=k=48,d=8,steps32 file.txt
+Incorrect parameter: --train-cover=k=48,d=8,steps32
+...
+Usage: zstd *
+
+Options:
+...

From 377ecefce93d3a8705cb54c553681ff234bd9f34 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 18 Dec 2023 09:39:42 -0800
Subject: [PATCH 137/283] Update windows-artifacts.yml

and fixed version number in comment
---
 .github/workflows/windows-artifacts.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/windows-artifacts.yml b/.github/workflows/windows-artifacts.yml
index 2300d09bf30..3a2bac7a453 100644
--- a/.github/workflows/windows-artifacts.yml
+++ b/.github/workflows/windows-artifacts.yml
@@ -52,7 +52,7 @@ jobs:
         cd ..
 
     - name: Publish zstd-$VERSION-${{matrix.ziparch}}.zip
-      uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # tag=v3
+      uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # tag=v4.0.0
       with:
         path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip
         name: zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip

From 475da4fb2e2aef102edecba04278b38fce44fb81 Mon Sep 17 00:00:00 2001
From: Theodore Tsirpanis 
Date: Tue, 19 Dec 2023 15:06:03 +0200
Subject: [PATCH 138/283] Make `zstd::libzstd` an imported interface target. It
 cannot be an alias, because it would lock the package to use either static or
 shared libraries at its build time. We want to decide this at the time
 `find_package` is called.

---
 build/cmake/zstdConfig.cmake.in | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/build/cmake/zstdConfig.cmake.in b/build/cmake/zstdConfig.cmake.in
index 752f3ab11ce..0a7f773d75f 100644
--- a/build/cmake/zstdConfig.cmake.in
+++ b/build/cmake/zstdConfig.cmake.in
@@ -7,10 +7,18 @@ endif()
 
 include("${CMAKE_CURRENT_LIST_DIR}/zstdTargets.cmake")
 
-if(@ZSTD_BUILD_SHARED@ AND NOT @ZSTD_BUILD_STATIC@)
-  add_library(zstd::libzstd ALIAS zstd::libzstd_shared)
-elseif(NOT @ZSTD_BUILD_SHARED@ AND @ZSTD_BUILD_STATIC@)
-  add_library(zstd::libzstd ALIAS zstd::libzstd_static)
+if(NOT TARGET zstd::libzstd)
+  if(@ZSTD_BUILD_SHARED@ AND NOT @ZSTD_BUILD_STATIC@)
+    add_library(zstd::libzstd INTERFACE IMPORTED)
+    set_target_properties(zstd::libzstd PROPERTIES
+      INTERFACE_LINK_LIBRARIES "zstd::libzstd_shared"
+    )
+  elseif(NOT @ZSTD_BUILD_SHARED@ AND @ZSTD_BUILD_STATIC@)
+    add_library(zstd::libzstd INTERFACE IMPORTED)
+    set_target_properties(zstd::libzstd PROPERTIES
+      INTERFACE_LINK_LIBRARIES "zstd::libzstd_static"
+    )
+  endif()
 endif()
 
 check_required_components("zstd")

From c6cabf94417d84ebb5da62e05d8b8a9623763585 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky 
Date: Thu, 28 Dec 2023 14:48:46 -0500
Subject: [PATCH 139/283] Make offload API compatible with static CCtx (#3854)

* Add ZSTD_CCtxParams_registerSequenceProducer() to public API

* add unit test

* add docs to zstd.h

* nits

* Add ZSTDLIB_STATIC_API prefix

* Add asserts
---
 lib/compress/zstd_compress.c | 23 +++++++++++----
 lib/zstd.h                   | 19 +++++++++++--
 tests/zstreamtest.c          | 54 +++++++++++++++++++++++++++++++++++-
 3 files changed, 87 insertions(+), 9 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index cdd763ff6cf..55415c7e3fb 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -7084,14 +7084,27 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH
 }
 
 void ZSTD_registerSequenceProducer(
-    ZSTD_CCtx* zc, void* extSeqProdState,
+    ZSTD_CCtx* zc,
+    void* extSeqProdState,
     ZSTD_sequenceProducer_F extSeqProdFunc
 ) {
+    assert(zc != NULL);
+    ZSTD_CCtxParams_registerSequenceProducer(
+        &zc->requestedParams, extSeqProdState, extSeqProdFunc
+    );
+}
+
+void ZSTD_CCtxParams_registerSequenceProducer(
+  ZSTD_CCtx_params* params,
+  void* extSeqProdState,
+  ZSTD_sequenceProducer_F extSeqProdFunc
+) {
+    assert(params != NULL);
     if (extSeqProdFunc != NULL) {
-        zc->requestedParams.extSeqProdFunc = extSeqProdFunc;
-        zc->requestedParams.extSeqProdState = extSeqProdState;
+        params->extSeqProdFunc = extSeqProdFunc;
+        params->extSeqProdState = extSeqProdState;
     } else {
-        zc->requestedParams.extSeqProdFunc = NULL;
-        zc->requestedParams.extSeqProdState = NULL;
+        params->extSeqProdFunc = NULL;
+        params->extSeqProdState = NULL;
     }
 }
diff --git a/lib/zstd.h b/lib/zstd.h
index 61f81db0f25..84126930520 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1665,9 +1665,6 @@ ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
  *
  *  Note : only single-threaded compression is supported.
  *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
- *
- *  Note 2 : ZSTD_estimateCCtxSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
- *  Size estimates assume that no external sequence producer is registered.
  */
 ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
@@ -2824,6 +2821,22 @@ ZSTD_registerSequenceProducer(
   ZSTD_sequenceProducer_F sequenceProducer
 );
 
+/*! ZSTD_CCtxParams_registerSequenceProducer() :
+ * Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params.
+ * This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(),
+ * which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx().
+ *
+ * If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx()
+ * is required, then this function is for you. Otherwise, you probably don't need it.
+ *
+ * See tests/zstreamtest.c for example usage. */
+ZSTDLIB_STATIC_API void
+ZSTD_CCtxParams_registerSequenceProducer(
+  ZSTD_CCtx_params* params,
+  void* sequenceProducerState,
+  ZSTD_sequenceProducer_F sequenceProducer
+);
+
 
 /*********************************************************************
 *  Buffer-less and synchronous inner streaming functions (DEPRECATED)
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 04f1f8b0e9c..82aaf3db50c 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -1920,7 +1920,7 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
     DISPLAYLEVEL(3, "test%3i : Block-Level External Sequence Producer API: ", testNb++);
     {
         size_t const dstBufSize = ZSTD_compressBound(CNBufferSize);
-        BYTE* const dstBuf = (BYTE*)malloc(ZSTD_compressBound(dstBufSize));
+        BYTE* const dstBuf = (BYTE*)malloc(dstBufSize);
         size_t const checkBufSize = CNBufferSize;
         BYTE* const checkBuf = (BYTE*)malloc(checkBufSize);
         int enableFallback;
@@ -2356,6 +2356,58 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
     }
     DISPLAYLEVEL(3, "OK \n");
 
+    DISPLAYLEVEL(3, "test%3i : Testing external sequence producer with static CCtx: ", testNb++);
+    {
+        size_t const dstBufSize = ZSTD_compressBound(CNBufferSize);
+        BYTE* const dstBuf = (BYTE*)malloc(dstBufSize);
+        size_t const checkBufSize = CNBufferSize;
+        BYTE* const checkBuf = (BYTE*)malloc(checkBufSize);
+        ZSTD_CCtx_params* params = ZSTD_createCCtxParams();
+        ZSTD_CCtx* staticCCtx;
+        void* cctxBuf;
+        EMF_testCase seqProdState;
+
+        CHECK_Z(ZSTD_CCtxParams_setParameter(params, ZSTD_c_validateSequences, 1));
+        CHECK_Z(ZSTD_CCtxParams_setParameter(params, ZSTD_c_enableSeqProducerFallback, 0));
+        ZSTD_CCtxParams_registerSequenceProducer(params, &seqProdState, zstreamSequenceProducer);
+
+        {
+            size_t const cctxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params);
+            cctxBuf = malloc(cctxSize);
+            staticCCtx = ZSTD_initStaticCCtx(cctxBuf, cctxSize);
+            ZSTD_CCtx_setParametersUsingCCtxParams(staticCCtx, params);
+        }
+
+        // Check that compression with external sequence producer succeeds when expected
+        seqProdState = EMF_LOTS_OF_SEQS;
+        {
+            size_t dResult;
+            size_t const cResult = ZSTD_compress2(staticCCtx, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
+            CHECK(ZSTD_isError(cResult), "EMF: Compression error: %s", ZSTD_getErrorName(cResult));
+            dResult = ZSTD_decompress(checkBuf, checkBufSize, dstBuf, cResult);
+            CHECK(ZSTD_isError(dResult), "EMF: Decompression error: %s", ZSTD_getErrorName(dResult));
+            CHECK(dResult != CNBufferSize, "EMF: Corruption!");
+            CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!");
+        }
+
+        // Check that compression with external sequence producer fails when expected
+        seqProdState = EMF_BIG_ERROR;
+        {
+            size_t const cResult = ZSTD_compress2(staticCCtx, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
+            CHECK(!ZSTD_isError(cResult), "EMF: Should have raised an error!");
+            CHECK(
+                ZSTD_getErrorCode(cResult) != ZSTD_error_sequenceProducer_failed,
+                "EMF: Wrong error code: %s", ZSTD_getErrorName(cResult)
+            );
+        }
+
+        free(dstBuf);
+        free(checkBuf);
+        free(cctxBuf);
+        ZSTD_freeCCtxParams(params);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
 _end:
     FUZ_freeDictionary(dictionary);
     ZSTD_freeCStream(zc);

From f06b18b3ff009ef7dc90294fca674658ddf139bf Mon Sep 17 00:00:00 2001
From: elasota <1137273+elasota@users.noreply.github.com>
Date: Sun, 19 Nov 2023 15:33:37 -0500
Subject: [PATCH 140/283] Specify offset 0 as invalid

---
 doc/decompressor_accepted_invalid_data.md | 14 ++++++++++++++
 doc/zstd_compression_format.md            |  5 ++++-
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 doc/decompressor_accepted_invalid_data.md

diff --git a/doc/decompressor_accepted_invalid_data.md b/doc/decompressor_accepted_invalid_data.md
new file mode 100644
index 00000000000..f08f963d93e
--- /dev/null
+++ b/doc/decompressor_accepted_invalid_data.md
@@ -0,0 +1,14 @@
+Decompressor Accepted Invalid Data
+==================================
+
+This document describes the behavior of the reference decompressor in cases
+where it accepts an invalid frame instead of reporting an error.
+
+Zero offsets converted to 1
+---------------------------
+If a sequence is decoded with `literals_length = 0` and `offset_value = 3`
+while `Repeated_Offset_1 = 1`, the computed offset will be `0`, which is
+invalid.
+
+The reference decompressor will process this case as if the computed
+offset was `1`, including inserting `1` into the repeated offset list.
\ No newline at end of file
diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index 4a8d338b7a6..a8d4a0f35f5 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -929,7 +929,10 @@ There is an exception though, when current sequence's `literals_length = 0`.
 In this case, repeated offsets are shifted by one,
 so an `offset_value` of 1 means `Repeated_Offset2`,
 an `offset_value` of 2 means `Repeated_Offset3`,
-and an `offset_value` of 3 means `Repeated_Offset1 - 1_byte`.
+and an `offset_value` of 3 means `Repeated_Offset1 - 1`.
+
+In the final case, if `Repeated_Offset1 - 1` evaluates to 0, then the
+data is considered corrupted.
 
 For the first block, the starting offset history is populated with following values :
 `Repeated_Offset1`=1, `Repeated_Offset2`=4, `Repeated_Offset3`=8,

From a52d897d6044029a904ab0d6893f6e73eec87d18 Mon Sep 17 00:00:00 2001
From: Ed Maste 
Date: Wed, 13 Dec 2023 19:56:14 -0500
Subject: [PATCH 141/283] Cirrus-CI: Add FreeBSD 14

---
 .cirrus.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.cirrus.yml b/.cirrus.yml
index 2e5e18c01f2..bf3f0c415d2 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -2,6 +2,7 @@ task:
   name: FreeBSD (shortest)
   freebsd_instance:
     matrix:
+      image_family: freebsd-14-0
       image_family: freebsd-13-2
   install_script: pkg install -y gmake coreutils
   script: |

From 923cf3dc9289d00b668cd0a330d5c28f22d4837f Mon Sep 17 00:00:00 2001
From: Eli Schwartz 
Date: Tue, 2 Jan 2024 01:36:45 -0500
Subject: [PATCH 142/283] CI: meson: use builtin handling for MSVC

This avoids downloading -- and periodically bumping the checksum for --
a third-party action that isn't strictly required, and thus helps keep
down dependencies and reduce update churn.
---
 .github/workflows/dev-short-tests.yml | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml
index 87de8394a9e..65e9ae655f9 100644
--- a/.github/workflows/dev-short-tests.yml
+++ b/.github/workflows/dev-short-tests.yml
@@ -63,7 +63,7 @@ jobs:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: build zstd bin against a dynamic lib (debuglevel for more dependencies)
       run: |
-        make -C lib lib-mt-release 
+        make -C lib lib-mt-release
         DEBUGLEVEL=2 make -C programs zstd-dll
 
   gcc-7-libzstd:
@@ -246,14 +246,12 @@ jobs:
       - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
       - name: Install packages
         run: pip install --pre meson
-      - name: Initialize the MSVC dev command prompt
-        uses: ilammy/msvc-dev-cmd@cec98b9d092141f74527d0afa6feb2af698cfe89
       - name: Configure with Meson
         run: |
-          meson setup build/meson/ builddir -Dbin_tests=true -Dbin_programs=true -Dbin_contrib=true
+          meson setup --vsenv build/meson/ builddir -Dbin_tests=true -Dbin_programs=true -Dbin_contrib=true
       - name: Build with Meson
         run: |
-          ninja -C builddir/
+          meson compile -C builddir/
       - name: Test with Meson
         run: |
           meson test -C builddir/ --print-errorlogs

From 66269e74a00e531a5f27fcb4fd65eb061d02dc5c Mon Sep 17 00:00:00 2001
From: Like Ma 
Date: Sat, 13 Jan 2024 15:36:20 +0800
Subject: [PATCH 143/283] Fix building xxhash on AIX 5.1

---
 lib/common/xxhash.h | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/lib/common/xxhash.h b/lib/common/xxhash.h
index 424ed19b8a0..449bd5f08e5 100644
--- a/lib/common/xxhash.h
+++ b/lib/common/xxhash.h
@@ -575,7 +575,11 @@ typedef uint32_t XXH32_hash_t;
 #elif !defined (__VMS) \
   && (defined (__cplusplus) \
   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-#   include 
+#   ifdef _AIX
+#     include 
+#   else
+#     include 
+#   endif
     typedef uint32_t XXH32_hash_t;
 
 #else
@@ -849,7 +853,11 @@ typedef uint64_t XXH64_hash_t;
 #elif !defined (__VMS) \
   && (defined (__cplusplus) \
   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-#  include 
+#   ifdef _AIX
+#     include 
+#   else
+#     include 
+#   endif
    typedef uint64_t XXH64_hash_t;
 #else
 #  include 
@@ -2465,7 +2473,11 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
 #if !defined (__VMS) \
  && (defined (__cplusplus) \
  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include 
+# ifdef _AIX
+#   include 
+# else
+#   include 
+# endif
   typedef uint8_t xxh_u8;
 #else
   typedef unsigned char xxh_u8;

From e49d1ab6aabcd662b76a46ef48391a5462357167 Mon Sep 17 00:00:00 2001
From: Like Ma 
Date: Sat, 13 Jan 2024 15:21:50 +0800
Subject: [PATCH 144/283] Fix building on HP-UX 11.11 PA-RISC

By CMake 3.8.2 and GCC 4.7.1
---
 build/cmake/CMakeLists.txt | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/build/cmake/CMakeLists.txt b/build/cmake/CMakeLists.txt
index 023b998f556..32f5fd6efb3 100644
--- a/build/cmake/CMakeLists.txt
+++ b/build/cmake/CMakeLists.txt
@@ -115,9 +115,20 @@ endif ()
 # External dependencies
 #-----------------------------------------------------------------------------
 if (ZSTD_MULTITHREAD_SUPPORT AND UNIX)
-    set(THREADS_PREFER_PTHREAD_FLAG ON)
-    find_package(Threads REQUIRED)
-    if(CMAKE_USE_PTHREADS_INIT)
+    if (CMAKE_SYSTEM_NAME MATCHES "HP-UX")
+        find_package(Threads)
+        if (NOT Threads_FOUND)
+            set(CMAKE_USE_PTHREADS_INIT 1)
+            set(CMAKE_THREAD_LIBS_INIT -lpthread)
+            set(CMAKE_HAVE_THREADS_LIBRARY 1)
+            set(Threads_FOUND TRUE)
+        endif ()
+    else ()
+        set(THREADS_PREFER_PTHREAD_FLAG ON)
+        find_package(Threads REQUIRED)
+    endif ()
+
+    if (CMAKE_USE_PTHREADS_INIT)
         set(THREADS_LIBS "${CMAKE_THREAD_LIBS_INIT}")
     else()
         message(SEND_ERROR "ZSTD currently does not support thread libraries other than pthreads")

From 3a2e302b2ca25eadca7d1952119837be70b2b8b2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Jan 2024 05:24:41 +0000
Subject: [PATCH 145/283] Bump github/codeql-action from 2.21.4 to 3.23.0

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.21.4 to 3.23.0.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/a09933a12a80f87b87005513f0abb1494c27a716...e5f05b81d5b6ff8cfa111c80c22c5fd02a384118)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] 
---
 .github/workflows/scorecards.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 2d698967d85..c612739024b 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -59,6 +59,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@a09933a12a80f87b87005513f0abb1494c27a716 # tag=v2.21.4
+        uses: github/codeql-action/upload-sarif@e5f05b81d5b6ff8cfa111c80c22c5fd02a384118 # tag=v3.23.0
         with:
           sarif_file: results.sarif

From e2fe26627907274f04b9ad7dabf41a3243547a1d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Jan 2024 05:24:46 +0000
Subject: [PATCH 146/283] Bump actions/upload-artifact from 4.0.0 to 4.1.0

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.0.0 to 4.1.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/c7d193f32edcb7bfad88892161225aeda64e9392...1eb3cb2b3e0f29609092a73eb033bb759a334595)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
---
 .github/workflows/dev-long-tests.yml    | 2 +-
 .github/workflows/scorecards.yml        | 2 +-
 .github/workflows/windows-artifacts.yml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml
index 4a47c9c89b6..2f3dd6a27ef 100644
--- a/.github/workflows/dev-long-tests.yml
+++ b/.github/workflows/dev-long-tests.yml
@@ -290,7 +290,7 @@ jobs:
         dry-run: false
         sanitizer: ${{ matrix.sanitizer }}
     - name: Upload Crash
-      uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # tag=v4.0.0
+      uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # tag=v4.1.0
       if: failure() && steps.build.outcome == 'success'
       with:
         name: ${{ matrix.sanitizer }}-artifacts
diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 2d698967d85..f2254ede0d6 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -51,7 +51,7 @@ jobs:
       # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
       # format to the repository Actions tab.
       - name: "Upload artifact"
-        uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # tag=v4.0.0
+        uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # tag=v4.1.0
         with:
           name: SARIF file
           path: results.sarif
diff --git a/.github/workflows/windows-artifacts.yml b/.github/workflows/windows-artifacts.yml
index 3a2bac7a453..10bef46dbfc 100644
--- a/.github/workflows/windows-artifacts.yml
+++ b/.github/workflows/windows-artifacts.yml
@@ -52,7 +52,7 @@ jobs:
         cd ..
 
     - name: Publish zstd-$VERSION-${{matrix.ziparch}}.zip
-      uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # tag=v4.0.0
+      uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # tag=v4.1.0
       with:
         path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip
         name: zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip

From e6f4b464938008c4f800a26027248a00db5c81c8 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 15 Jan 2024 11:16:46 -0800
Subject: [PATCH 147/283] playTests.sh does no longer needs grep -E

it makes the test script more portable across posix systems
because `grep -E` is not guaranteed
while `grep` is fairly common.
---
 lib/common/pool.c         |  2 +-
 programs/fileio_asyncio.c |  4 +--
 tests/playTests.sh        | 66 +++++++++++++++++++--------------------
 3 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/lib/common/pool.c b/lib/common/pool.c
index d5ca5a7808a..3adcefc9a50 100644
--- a/lib/common/pool.c
+++ b/lib/common/pool.c
@@ -223,7 +223,7 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
     {   ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
         if (!threadPool) return 1;
         /* replace existing thread pool */
-        ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
+        ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(ZSTD_pthread_t));
         ZSTD_customFree(ctx->threads, ctx->customMem);
         ctx->threads = threadPool;
         /* Initialize additional threads */
diff --git a/programs/fileio_asyncio.c b/programs/fileio_asyncio.c
index dbf0c756414..5f7bd4a4ce1 100644
--- a/programs/fileio_asyncio.c
+++ b/programs/fileio_asyncio.c
@@ -453,8 +453,8 @@ static IOJob_t* AIO_ReadPool_findNextWaitingOffsetCompletedJob_locked(ReadPoolCt
 /* AIO_ReadPool_numReadsInFlight:
  * Returns the number of IO read jobs currently in flight. */
 static size_t AIO_ReadPool_numReadsInFlight(ReadPoolCtx_t* ctx) {
-    const size_t jobsHeld = (ctx->currentJobHeld==NULL ? 0 : 1);
-    return ctx->base.totalIoJobs - (ctx->base.availableJobsCount + ctx->completedJobsCount + jobsHeld);
+    const int jobsHeld = (ctx->currentJobHeld==NULL ? 0 : 1);
+    return (size_t)(ctx->base.totalIoJobs - (ctx->base.availableJobsCount + ctx->completedJobsCount + jobsHeld));
 }
 
 /* AIO_ReadPool_getNextCompletedJob:
diff --git a/tests/playTests.sh b/tests/playTests.sh
index bdbd00142cb..0e07ebb1904 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -91,7 +91,13 @@ fi
 SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
 PRGDIR="$SCRIPT_DIR/../programs"
 TESTDIR="$SCRIPT_DIR/../tests"
-UNAME=$(uname)
+UNAME=${UNAME:-$(uname)}
+GREP=${GREP:-grep}
+
+case "$UNAME" in
+  SunOS) DIFF=${DIFF:-gdiff} ;;
+  *) DIFF=${DIFF:-diff} ;;
+esac
 
 detectedTerminal=false
 if [ -t 0 ] && [ -t 1 ]
@@ -151,11 +157,6 @@ assertSamePermissions() {
     [ "$STAT1" = "$STAT2" ] || die "permissions on $1 don't match those on $2 ($STAT1 != $STAT2)"
 }
 
-DIFF="diff"
-case "$UNAME" in
-  SunOS) DIFF="gdiff" ;;
-esac
-
 
 # check if ZSTD_BIN is defined. if not, use the default value
 if [ -z "${ZSTD_BIN}" ]; then
@@ -177,7 +178,7 @@ fi
 [ -n "$DATAGEN_BIN" ] || die "datagen not found at $DATAGEN_BIN! \n Please define DATAGEN_BIN pointing to the datagen binary. You might also consider rebuilding zstd tests following the instructions in README.md. "
 println "\nStarting playTests.sh isWindows=$isWindows EXE_PREFIX='$EXE_PREFIX' ZSTD_BIN='$ZSTD_BIN' DATAGEN_BIN='$DATAGEN_BIN'"
 
-if echo hello | zstd -v -T2 2>&1 > $INTOVOID | grep -q 'multi-threading is disabled'
+if echo hello | zstd -v -T2 2>&1 > $INTOVOID | $GREP -q 'multi-threading is disabled'
 then
     hasMT=""
 else
@@ -253,8 +254,8 @@ println "test : null-length file roundtrip"
 println -n '' | zstd - --stdout | zstd -d --stdout
 println "test : ensure small file doesn't add 3-bytes null block"
 datagen -g1 > tmp1
-zstd tmp1 -c | wc -c | grep "14"
-zstd < tmp1  | wc -c | grep "14"
+zstd tmp1 -c | wc -c | $GREP "14"
+zstd < tmp1  | wc -c | $GREP "14"
 println "test : decompress file with wrong suffix (must fail)"
 zstd -d tmpCompressed && die "wrong suffix error not detected!"
 zstd -df tmp && die "should have refused : wrong extension"
@@ -291,9 +292,9 @@ println "test: --no-progress flag"
 zstd tmpro -c --no-progress | zstd -d -f -o "$INTOVOID" --no-progress
 zstd tmpro -cv --no-progress | zstd -dv -f -o "$INTOVOID" --no-progress
 println "test: --progress flag"
-zstd tmpro -c | zstd -d -f -o "$INTOVOID" --progress 2>&1 | grep -E "[A-Za-z0-9._ ]+: [0-9]+ bytes"
-zstd tmpro -c | zstd -d -f -q -o "$INTOVOID" --progress 2>&1 | grep -E "[A-Za-z0-9._ ]+: [0-9]+ bytes"
-zstd tmpro -c | zstd -d -f -v -o "$INTOVOID" 2>&1 | grep -E "[A-Za-z0-9._ ]+: [0-9]+ bytes"
+zstd tmpro -c | zstd -d -f -o "$INTOVOID" --progress 2>&1 | $GREP "[A-Za-z0-9._ ]\+: [0-9]\+ bytes"
+zstd tmpro -c | zstd -d -f -q -o "$INTOVOID" --progress 2>&1 | $GREP "[A-Za-z0-9._ ]\+: [0-9]\+ bytes"
+zstd tmpro -c | zstd -d -f -v -o "$INTOVOID" 2>&1 | $GREP "[A-Za-z0-9._ ]\+: [0-9]\+ bytes"
 rm -f tmpro tmpro.zst
 println "test: overwrite input file (must fail)"
 zstd tmp -fo tmp && die "zstd compression overwrote the input file"
@@ -320,7 +321,7 @@ zstd -d -f tmp.zst --no-check
 if [ "$isWindows" = false ] && [ "$UNAME" != "AIX" ]; then
   if [ -n "$(which readelf)" ]; then
     println "test: check if binary has executable stack (#2963)"
-    readelf -lW "$ZSTD_BIN" | grep 'GNU_STACK .* RW ' || die "zstd binary has executable stack!"
+    readelf -lW "$ZSTD_BIN" | $GREP 'GNU_STACK .* RW ' || die "zstd binary has executable stack!"
   fi
 fi
 
@@ -413,9 +414,9 @@ zstd -q tmp tmp2 -o tmp3.zst --rm && die "should refuse to concatenate"
 println "test : should quietly not remove non-regular file"
 println hello > tmp
 zstd tmp -f -o "$DEVDEVICE" 2>tmplog > "$INTOVOID"
-grep "Refusing to remove non-regular file" tmplog && die
+$GREP "Refusing to remove non-regular file" tmplog && die
 rm -f tmplog
-zstd tmp -f -o "$INTOVOID" 2>&1 | grep "Refusing to remove non-regular file" && die
+zstd tmp -f -o "$INTOVOID" 2>&1 | $GREP "Refusing to remove non-regular file" && die
 println "test : --rm on stdin"
 println a | zstd --rm > $INTOVOID   # --rm should remain silent
 rm -f tmp
@@ -615,7 +616,7 @@ if [ -n "$DEVNULLRIGHTS" ] ; then
     zstd tmp -f -o tmp.zst
     sudoZstd -d tmp.zst -c > $INTOVOID
     sudoZstd -d tmp.zst -o $INTOVOID
-    ls -las $INTOVOID | grep "rw-rw-rw-"
+    ls -las $INTOVOID | $GREP "rw-rw-rw-"
 fi
 
 if [ -n "$READFROMBLOCKDEVICE" ] ; then
@@ -625,7 +626,7 @@ if [ -n "$READFROMBLOCKDEVICE" ] ; then
     println "\n===> checking that zstd can read from a block device"
     datagen -g65536 > tmp.img
     sudo losetup -fP tmp.img
-    LOOP_DEV=$(losetup -a | grep 'tmp\.img' | cut -f1 -d:)
+    LOOP_DEV=$(losetup -a | $GREP 'tmp\.img' | cut -f1 -d:)
     [ -z "$LOOP_DEV" ] && die "failed to get loopback device"
     sudoZstd $LOOP_DEV -c > tmp.img.zst && die "should fail without -f"
     sudoZstd -f $LOOP_DEV -c > tmp.img.zst
@@ -774,13 +775,13 @@ println "\n===> --[no-]content-size tests"
 
 datagen > tmp_contentsize
 zstd -f tmp_contentsize
-zstd -lv tmp_contentsize.zst | grep "Decompressed Size:"
+zstd -lv tmp_contentsize.zst | $GREP "Decompressed Size:"
 zstd -f --no-content-size tmp_contentsize
-zstd -lv tmp_contentsize.zst | grep "Decompressed Size:" && die
+zstd -lv tmp_contentsize.zst | $GREP "Decompressed Size:" && die
 zstd -f --content-size tmp_contentsize
-zstd -lv tmp_contentsize.zst | grep "Decompressed Size:"
+zstd -lv tmp_contentsize.zst | $GREP "Decompressed Size:"
 zstd -f --content-size --no-content-size tmp_contentsize
-zstd -lv tmp_contentsize.zst | grep "Decompressed Size:" && die
+zstd -lv tmp_contentsize.zst | $GREP "Decompressed Size:" && die
 rm -rf tmp*
 
 println "test : show-default-cparams regular"
@@ -800,8 +801,7 @@ rm -rf tmp*
 println "test : show compression parameters in verbose mode"
 datagen > tmp
 zstd -vv tmp 2>&1 | \
-grep -q -E -- "--zstd=wlog=[[:digit:]]+,clog=[[:digit:]]+,hlog=[[:digit:]]+,\
-slog=[[:digit:]]+,mml=[[:digit:]]+,tlen=[[:digit:]]+,strat=[[:digit:]]+"
+$GREP -q -- "--zstd=wlog=[0-9]\+,clog=[0-9]\+,hlog=[0-9]\+,slog=[0-9]\+,mml=[0-9]\+,tlen=[0-9]\+,strat=[0-9]\+"
 rm -rf tmp*
 
 println "\n===>  Advanced compression parameters "
@@ -1098,8 +1098,8 @@ println "- Test --memory for dictionary compression"
 datagen -g12M -P90 > tmpCorpusHighCompress
 zstd --train -B2K tmpCorpusHighCompress -o tmpDictHighCompress --memory=10K && die "Dictionary training should fail : --memory too low (10K)"
 zstd --train -B2K tmpCorpusHighCompress -o tmpDictHighCompress --memory=5MB 2> zstTrainWithMemLimitStdErr
-cat zstTrainWithMemLimitStdErr | grep "setting manual memory limit for dictionary training data at 5 MB"
-cat zstTrainWithMemLimitStdErr | grep "Training samples set too large (12 MB); training on 5 MB only..."
+cat zstTrainWithMemLimitStdErr | $GREP "setting manual memory limit for dictionary training data at 5 MB"
+cat zstTrainWithMemLimitStdErr | $GREP "Training samples set too large (12 MB); training on 5 MB only..."
 rm zstTrainWithMemLimitStdErr
 
 println "\n===>  fastCover dictionary builder : advanced options "
@@ -1385,16 +1385,16 @@ println "\n===> suffix list test"
 ! zstd -d tmp.abc 2> tmplg
 
 if [ $GZIPMODE -ne 1 ]; then
-    grep ".gz" tmplg > $INTOVOID && die "Unsupported suffix listed"
+    $GREP ".gz" tmplg > $INTOVOID && die "Unsupported suffix listed"
 fi
 
 if [ $LZMAMODE -ne 1 ]; then
-    grep ".lzma" tmplg > $INTOVOID && die "Unsupported suffix listed"
-    grep ".xz" tmplg > $INTOVOID && die "Unsupported suffix listed"
+    $GREP ".lzma" tmplg > $INTOVOID && die "Unsupported suffix listed"
+    $GREP ".xz" tmplg > $INTOVOID && die "Unsupported suffix listed"
 fi
 
 if [ $LZ4MODE -ne 1 ]; then
-    grep ".lz4" tmplg > $INTOVOID && die "Unsupported suffix listed"
+    $GREP ".lz4" tmplg > $INTOVOID && die "Unsupported suffix listed"
 fi
 
 touch tmp1
@@ -1523,7 +1523,7 @@ datagen > tmp2
 datagen > tmp3
 zstd tmp*
 zstd -l ./*.zst
-zstd -lv ./*.zst | grep "Decompressed Size:"  # check that decompressed size is present in header
+zstd -lv ./*.zst | $GREP "Decompressed Size:"  # check that decompressed size is present in header
 zstd --list ./*.zst
 zstd --list -v ./*.zst
 
@@ -1566,13 +1566,13 @@ datagen -g0 > tmp5
 zstd tmp5
 zstd -l tmp5.zst
 zstd -l tmp5* && die "-l must fail on non-zstd file"
-zstd -lv tmp5.zst | grep "Decompressed Size: 0 B (0 B)"  # check that 0 size is present in header
+zstd -lv tmp5.zst | $GREP "Decompressed Size: 0 B (0 B)"  # check that 0 size is present in header
 zstd -lv tmp5* && die "-l must fail on non-zstd file"
 
 println "\n===>  zstd --list/-l test with no content size field "
 datagen -g513K | zstd > tmp6.zst
 zstd -l tmp6.zst
-zstd -lv tmp6.zst | grep "Decompressed Size:"  && die "Field :Decompressed Size: should not be available in this compressed file"
+zstd -lv tmp6.zst | $GREP "Decompressed Size:"  && die "Field :Decompressed Size: should not be available in this compressed file"
 
 println "\n===>   zstd --list/-l test with no checksum "
 zstd -f --no-check tmp1
@@ -1723,7 +1723,7 @@ else
     datagen -g5000000 > tmp_dict
     datagen -g5000000 > tmp_patch
 fi
-zstd -15 --patch-from=tmp_dict tmp_patch 2>&1 | grep "long mode automatically triggered"
+zstd -15 --patch-from=tmp_dict tmp_patch 2>&1 | $GREP "long mode automatically triggered"
 rm -rf tmp*
 
 println "\n===> patch-from very large dictionary and file test"

From 81f444f4f92b561278552c00d1d764caea1cff05 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 16 Jan 2024 12:14:35 -0800
Subject: [PATCH 148/283] made playTests.sh more compatible with older versions
 of grep

replaced `\+` by `*`.
`\+` means `[1-N]`,
while `*` means `[0-N]`,
so it's not strictly equivalent
but `\+` happens to be badly supported on some flavors of grep,
and for the purpose of these tests, `*` is good enough.
---
 tests/playTests.sh | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/playTests.sh b/tests/playTests.sh
index 0e07ebb1904..bf5fba89b35 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -1,6 +1,7 @@
 #!/bin/sh
 
-set -e
+set -e # exit immediately on error
+# set -x # print commands before execution (debug)
 
 unset ZSTD_CLEVEL
 unset ZSTD_NBTHREADS
@@ -292,9 +293,9 @@ println "test: --no-progress flag"
 zstd tmpro -c --no-progress | zstd -d -f -o "$INTOVOID" --no-progress
 zstd tmpro -cv --no-progress | zstd -dv -f -o "$INTOVOID" --no-progress
 println "test: --progress flag"
-zstd tmpro -c | zstd -d -f -o "$INTOVOID" --progress 2>&1 | $GREP "[A-Za-z0-9._ ]\+: [0-9]\+ bytes"
-zstd tmpro -c | zstd -d -f -q -o "$INTOVOID" --progress 2>&1 | $GREP "[A-Za-z0-9._ ]\+: [0-9]\+ bytes"
-zstd tmpro -c | zstd -d -f -v -o "$INTOVOID" 2>&1 | $GREP "[A-Za-z0-9._ ]\+: [0-9]\+ bytes"
+zstd tmpro -c | zstd -d -f -o "$INTOVOID" --progress 2>&1 | $GREP '[A-Za-z0-9._ ]*: [0-9]* bytes'
+zstd tmpro -c | zstd -d -f -q -o "$INTOVOID" --progress 2>&1 | $GREP '[A-Za-z0-9._ ]*: [0-9]* bytes'
+zstd tmpro -c | zstd -d -f -v -o "$INTOVOID" 2>&1 | $GREP '[A-Za-z0-9._ ]*: [0-9]* bytes'
 rm -f tmpro tmpro.zst
 println "test: overwrite input file (must fail)"
 zstd tmp -fo tmp && die "zstd compression overwrote the input file"
@@ -801,7 +802,7 @@ rm -rf tmp*
 println "test : show compression parameters in verbose mode"
 datagen > tmp
 zstd -vv tmp 2>&1 | \
-$GREP -q -- "--zstd=wlog=[0-9]\+,clog=[0-9]\+,hlog=[0-9]\+,slog=[0-9]\+,mml=[0-9]\+,tlen=[0-9]\+,strat=[0-9]\+"
+$GREP -q -- "--zstd=wlog=[0-9]*,clog=[0-9]*,hlog=[0-9]*,slog=[0-9]*,mml=[0-9]*,tlen=[0-9]*,strat=[0-9]*"
 rm -rf tmp*
 
 println "\n===>  Advanced compression parameters "

From ee2efb634eab104a2ec18ab6b2ce277bc159cbd0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 22 Jan 2024 05:21:32 +0000
Subject: [PATCH 149/283] Bump actions/upload-artifact from 4.1.0 to 4.2.0

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.1.0 to 4.2.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/1eb3cb2b3e0f29609092a73eb033bb759a334595...694cdabd8bdb0f10b2cea11669e1bf5453eed0a6)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
---
 .github/workflows/dev-long-tests.yml    | 2 +-
 .github/workflows/scorecards.yml        | 2 +-
 .github/workflows/windows-artifacts.yml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml
index 2f3dd6a27ef..3fe5eb2f664 100644
--- a/.github/workflows/dev-long-tests.yml
+++ b/.github/workflows/dev-long-tests.yml
@@ -290,7 +290,7 @@ jobs:
         dry-run: false
         sanitizer: ${{ matrix.sanitizer }}
     - name: Upload Crash
-      uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # tag=v4.1.0
+      uses: actions/upload-artifact@694cdabd8bdb0f10b2cea11669e1bf5453eed0a6 # tag=v4.2.0
       if: failure() && steps.build.outcome == 'success'
       with:
         name: ${{ matrix.sanitizer }}-artifacts
diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 5b5175489e8..8d7d71b5f2a 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -51,7 +51,7 @@ jobs:
       # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
       # format to the repository Actions tab.
       - name: "Upload artifact"
-        uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # tag=v4.1.0
+        uses: actions/upload-artifact@694cdabd8bdb0f10b2cea11669e1bf5453eed0a6 # tag=v4.2.0
         with:
           name: SARIF file
           path: results.sarif
diff --git a/.github/workflows/windows-artifacts.yml b/.github/workflows/windows-artifacts.yml
index 10bef46dbfc..e0f3b713419 100644
--- a/.github/workflows/windows-artifacts.yml
+++ b/.github/workflows/windows-artifacts.yml
@@ -52,7 +52,7 @@ jobs:
         cd ..
 
     - name: Publish zstd-$VERSION-${{matrix.ziparch}}.zip
-      uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # tag=v4.1.0
+      uses: actions/upload-artifact@694cdabd8bdb0f10b2cea11669e1bf5453eed0a6 # tag=v4.2.0
       with:
         path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip
         name: zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip

From c7611d6964d7012c24850c3a2cd3092f50f9d6ba Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 27 Jan 2024 15:37:29 -0800
Subject: [PATCH 150/283] disable Intel CET Compatibility tests

The binary blob that must be downloaded from intel.com is no longer available
---
 .github/workflows/dev-short-tests.yml | 45 ++++++++++++++-------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml
index 65e9ae655f9..911ba60cc2e 100644
--- a/.github/workflows/dev-short-tests.yml
+++ b/.github/workflows/dev-short-tests.yml
@@ -524,25 +524,6 @@ jobs:
         make -C tests fuzzer &&
         ./tests/fuzzer.exe -v -T1m
 
-  intel-cet-compatibility:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
-    - name: Build Zstd
-      run: |
-        make -j zstd V=1
-        readelf -n zstd
-    - name: Get Intel SDE
-      run: |
-        curl -LO https://downloadmirror.intel.com/684899/sde-external-9.0.0-2021-11-07-lin.tar.xz
-        tar xJvf sde-external-9.0.0-2021-11-07-lin.tar.xz
-    - name: Configure Permissions
-      run: |
-        echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
-    - name: Run Under SDE
-      run: |
-        sde-external-9.0.0-2021-11-07-lin/sde -cet -cet-raise 0 -cet-endbr-exe -cet-stderr -cet-abort -- ./zstd -b3
-
   pkg-config:
     runs-on: ubuntu-latest
     container:
@@ -600,9 +581,9 @@ jobs:
         ./programs/zstd -b
 
 
-# For reference : icc tests
+# Failing tests, for reference
+
 # icc tests are currently failing on Github Actions, likely to issues during installation stage
-# To be fixed later
 #
 #  icc:
 #    name: icc-check
@@ -622,3 +603,25 @@ jobs:
 #    - name: make check
 #      run: |
 #        make CC=/opt/intel/oneapi/compiler/latest/linux/bin/intel64/icc check
+
+
+# intel-cet-compatibility tests are currently failing, during download stage
+#
+#  intel-cet-compatibility:
+#    runs-on: ubuntu-latest
+#    steps:
+#    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
+#    - name: Build Zstd
+#      run: |
+#        make -j zstd V=1
+#        readelf -n zstd
+#    - name: Get Intel SDE
+#      run: |
+#        curl -LO https://downloadmirror.intel.com/684899/sde-external-9.0.0-2021-11-07-lin.tar.xz
+#        tar xJvf sde-external-9.0.0-2021-11-07-lin.tar.xz
+#    - name: Configure Permissions
+#      run: |
+#        echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
+#    - name: Run Under SDE
+#      run: |
+#        sde-external-9.0.0-2021-11-07-lin/sde -cet -cet-raise 0 -cet-endbr-exe -cet-stderr -cet-abort -- ./zstd -b3

From 2fc7248412db6c92086369fc3243f93f397cff4c Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 27 Jan 2024 15:09:21 -0800
Subject: [PATCH 151/283] fix cmakebuild test

write it in a way which is more compatible with older versions of cmake (<3.13)

Also:
fix pzstd compilation (notably on macos)
---
 .github/workflows/dev-short-tests.yml    |  9 +++----
 .gitignore                               |  2 ++
 Makefile                                 | 32 +++++++++++++-----------
 build/cmake/contrib/pzstd/CMakeLists.txt |  1 +
 4 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml
index 911ba60cc2e..593f37879ab 100644
--- a/.github/workflows/dev-short-tests.yml
+++ b/.github/workflows/dev-short-tests.yml
@@ -80,19 +80,16 @@ jobs:
         make clean
         LDFLAGS=-Wl,--no-undefined make -C lib libzstd-mt
 
-    # candidate test (to check) : underlink test
+    # candidate test (for discussion) : underlink test
     # LDFLAGS=-Wl,--no-undefined : will make the linker fail if dll is underlinked
 
   cmake-build-and-test-check:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
-    - name: cmake build and test check
+    - name: cmake build and test
       run: |
-        FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild
-        cp -r ./ "../zstd source"
-        cd "../zstd source"
-        FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild
+        FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild V=1
 
   cpp-gnu90-c99-compatibility:
     runs-on: ubuntu-latest
diff --git a/.gitignore b/.gitignore
index e95a8d07cd8..048a4c4e492 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,6 +27,8 @@ tmp*
 dictionary.
 dictionary
 NUL
+cmakebuild/
+install/
 
 # Build artefacts
 contrib/linux-kernel/linux/
diff --git a/Makefile b/Makefile
index 7bc7ec5bf91..87d80d16dea 100644
--- a/Makefile
+++ b/Makefile
@@ -145,7 +145,7 @@ clean:
 	$(Q)$(MAKE) -C contrib/largeNbDicts $@ > $(VOID)
 	$(Q)$(MAKE) -C contrib/externalSequenceProducer $@ > $(VOID)
 	$(Q)$(RM) zstd$(EXT) zstdmt$(EXT) tmp*
-	$(Q)$(RM) -r lz4
+	$(Q)$(RM) -r lz4 cmakebuild install
 	@echo Cleaning completed
 
 #------------------------------------------------------------------------------
@@ -389,28 +389,32 @@ lz4install:
 endif
 
 
-CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON -DCMAKE_BUILD_TYPE=Release
-
 ifneq (,$(filter MSYS%,$(shell uname)))
 HOST_OS = MSYS
-CMAKE_PARAMS = -G"MSYS Makefiles" -DCMAKE_BUILD_TYPE=Debug -DZSTD_MULTITHREAD_SUPPORT:BOOL=OFF -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON
 endif
 
 #------------------------------------------------------------------------
 # target specific tests
 #------------------------------------------------------------------------
 ifneq (,$(filter $(HOST_OS),MSYS POSIX))
-.PHONY: cmakebuild c89build gnu90build c99build gnu99build c11build bmix64build bmix32build bmi32build staticAnalyze
-cmakebuild:
-	cmake --version
-	$(RM) -r $(BUILDIR)/cmake/build
-	$(MKDIR) $(BUILDIR)/cmake/build
-	cd $(BUILDIR)/cmake/build; cmake -DCMAKE_INSTALL_PREFIX:PATH=~/install_test_dir $(CMAKE_PARAMS) ..
-	$(MAKE) -C $(BUILDIR)/cmake/build -j4;
-	$(MAKE) -C $(BUILDIR)/cmake/build install;
-	$(MAKE) -C $(BUILDIR)/cmake/build uninstall;
-	cd $(BUILDIR)/cmake/build; ctest -V -L Medium
 
+CMAKE ?= cmake
+CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON
+
+ifneq (,$(filter MSYS%,$(shell uname)))
+CMAKE_PARAMS = -G"MSYS Makefiles" -DZSTD_MULTITHREAD_SUPPORT:BOOL=OFF -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON
+endif
+
+.PHONY: cmakebuild
+cmakebuild:
+	$(CMAKE) --version
+	$(RM) -r cmakebuild install
+	$(MKDIR) cmakebuild install
+	cd cmakebuild; $(CMAKE) -Wdev -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-Werror -O0" -DCMAKE_INSTALL_PREFIX=install $(CMAKE_PARAMS) ../build/cmake
+	$(CMAKE) --build cmakebuild --target install -- -j V=1
+	cd cmakebuild; ctest -V -L Medium
+
+.PHONY: c89build gnu90build c99build gnu99build c11build bmix64build bmix32build bmi32build staticAnalyze
 c89build: clean
 	$(CC) -v
 	CFLAGS="-std=c89 -Werror -Wno-attributes -Wpedantic -Wno-long-long -Wno-variadic-macros -O0" $(MAKE) lib zstd
diff --git a/build/cmake/contrib/pzstd/CMakeLists.txt b/build/cmake/contrib/pzstd/CMakeLists.txt
index f7098fa0f7f..e1c8e0672fe 100644
--- a/build/cmake/contrib/pzstd/CMakeLists.txt
+++ b/build/cmake/contrib/pzstd/CMakeLists.txt
@@ -18,6 +18,7 @@ set(PZSTD_DIR ${ZSTD_SOURCE_DIR}/contrib/pzstd)
 include_directories(${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${PZSTD_DIR})
 
 add_executable(pzstd ${PROGRAMS_DIR}/util.c ${PZSTD_DIR}/main.cpp ${PZSTD_DIR}/Options.cpp ${PZSTD_DIR}/Pzstd.cpp ${PZSTD_DIR}/SkippableFrame.cpp)
+target_compile_features(pzstd PRIVATE cxx_std_11)
 set_property(TARGET pzstd APPEND PROPERTY COMPILE_DEFINITIONS "NDEBUG")
 set_property(TARGET pzstd APPEND PROPERTY COMPILE_OPTIONS "-Wno-shadow")
 

From e1ef81a3ae94dad4aa846615fc6e2293b28f50e8 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sun, 28 Jan 2024 20:25:04 -0800
Subject: [PATCH 152/283] add sparc64 compilation test

---
 .github/workflows/dev-short-tests.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml
index 593f37879ab..ace63b380ec 100644
--- a/.github/workflows/dev-short-tests.yml
+++ b/.github/workflows/dev-short-tests.yml
@@ -381,6 +381,7 @@ jobs:
           { name: S390X,    xcc_pkg: gcc-s390x-linux-gnu,       xcc: s390x-linux-gnu-gcc,       xemu_pkg: qemu-system-s390x,  xemu: qemu-s390x-static   },
           { name: MIPS,     xcc_pkg: gcc-mips-linux-gnu,        xcc: mips-linux-gnu-gcc,        xemu_pkg: qemu-system-mips,   xemu: qemu-mips-static    },
           { name: M68K,     xcc_pkg: gcc-m68k-linux-gnu,        xcc: m68k-linux-gnu-gcc,        xemu_pkg: qemu-system-m68k,   xemu: qemu-m68k-static    },
+          { name: SPARC,    xcc_pkg: gcc-sparc64-linux-gnu,     xcc: sparc64-linux-gnu-gcc,     xemu_pkg: qemu-system-sparc,  xemu: qemu-sparc64-static },
         ]
     env:                        # Set environment variables
       XCC: ${{ matrix.xcc }}
@@ -427,6 +428,10 @@ jobs:
       if: ${{ matrix.name == 'M68K' }}
       run: |
         LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
+    - name: SPARC
+      if: ${{ matrix.name == 'SPARC' }}
+      run: |
+        LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
 
   mingw-short-test:
     runs-on: windows-latest

From 163e9b66377126e1b498c40628660d59aababf9f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 29 Jan 2024 05:53:39 +0000
Subject: [PATCH 153/283] Bump actions/upload-artifact from 4.2.0 to 4.3.0

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.2.0 to 4.3.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/694cdabd8bdb0f10b2cea11669e1bf5453eed0a6...26f96dfa697d77e81fd5907df203aa23a56210a8)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
---
 .github/workflows/dev-long-tests.yml    | 2 +-
 .github/workflows/scorecards.yml        | 2 +-
 .github/workflows/windows-artifacts.yml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml
index 3fe5eb2f664..29db7316ea3 100644
--- a/.github/workflows/dev-long-tests.yml
+++ b/.github/workflows/dev-long-tests.yml
@@ -290,7 +290,7 @@ jobs:
         dry-run: false
         sanitizer: ${{ matrix.sanitizer }}
     - name: Upload Crash
-      uses: actions/upload-artifact@694cdabd8bdb0f10b2cea11669e1bf5453eed0a6 # tag=v4.2.0
+      uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 # tag=v4.3.0
       if: failure() && steps.build.outcome == 'success'
       with:
         name: ${{ matrix.sanitizer }}-artifacts
diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 8d7d71b5f2a..770f9d4a382 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -51,7 +51,7 @@ jobs:
       # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
       # format to the repository Actions tab.
       - name: "Upload artifact"
-        uses: actions/upload-artifact@694cdabd8bdb0f10b2cea11669e1bf5453eed0a6 # tag=v4.2.0
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 # tag=v4.3.0
         with:
           name: SARIF file
           path: results.sarif
diff --git a/.github/workflows/windows-artifacts.yml b/.github/workflows/windows-artifacts.yml
index e0f3b713419..b689bf560a7 100644
--- a/.github/workflows/windows-artifacts.yml
+++ b/.github/workflows/windows-artifacts.yml
@@ -52,7 +52,7 @@ jobs:
         cd ..
 
     - name: Publish zstd-$VERSION-${{matrix.ziparch}}.zip
-      uses: actions/upload-artifact@694cdabd8bdb0f10b2cea11669e1bf5453eed0a6 # tag=v4.2.0
+      uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 # tag=v4.3.0
       with:
         path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip
         name: zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip

From c485b57bc73abfda9085650b4caaf013248e42dc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 29 Jan 2024 05:53:45 +0000
Subject: [PATCH 154/283] Bump microsoft/setup-msbuild from 1.3.1 to 1.3.2

Bumps [microsoft/setup-msbuild](https://github.com/microsoft/setup-msbuild) from 1.3.1 to 1.3.2.
- [Release notes](https://github.com/microsoft/setup-msbuild/releases)
- [Changelog](https://github.com/microsoft/setup-msbuild/blob/main/building-release.md)
- [Commits](https://github.com/microsoft/setup-msbuild/compare/1ff57057b5cfdc39105cd07a01d78e9b0ea0c14c...031090342aeefe171e49f3820f3b52110c66e402)

---
updated-dependencies:
- dependency-name: microsoft/setup-msbuild
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
---
 .github/workflows/dev-short-tests.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml
index 593f37879ab..b834ce2d745 100644
--- a/.github/workflows/dev-short-tests.yml
+++ b/.github/workflows/dev-short-tests.yml
@@ -271,7 +271,7 @@ jobs:
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: Add MSBuild to PATH
-      uses: microsoft/setup-msbuild@1ff57057b5cfdc39105cd07a01d78e9b0ea0c14c # tag=v1.3
+      uses: microsoft/setup-msbuild@031090342aeefe171e49f3820f3b52110c66e402 # tag=v1.3
     - name: Build
       working-directory: ${{env.GITHUB_WORKSPACE}}
       run: |
@@ -298,7 +298,7 @@ jobs:
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: Add MSBuild to PATH
-      uses: microsoft/setup-msbuild@1ff57057b5cfdc39105cd07a01d78e9b0ea0c14c # tag=v1.3
+      uses: microsoft/setup-msbuild@031090342aeefe171e49f3820f3b52110c66e402 # tag=v1.3
     - name: Build ${{matrix.name}}
       working-directory: ${{env.GITHUB_WORKSPACE}}
       # See https://docs.microsoft.com/visualstudio/msbuild/msbuild-command-line-reference
@@ -480,7 +480,7 @@ jobs:
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: Add MSBuild to PATH
-      uses: microsoft/setup-msbuild@1ff57057b5cfdc39105cd07a01d78e9b0ea0c14c # tag=v1.3
+      uses: microsoft/setup-msbuild@031090342aeefe171e49f3820f3b52110c66e402 # tag=v1.3
     - name: Build and run tests
       working-directory: ${{env.GITHUB_WORKSPACE}}
       env:

From d0b7da30e26406c7ece2bf538a70410e80b9de9f Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 29 Jan 2024 15:00:32 -0800
Subject: [PATCH 155/283] add a lorem ipsum generator

this generator replaces the statistical generator
for the general case when no statistic is requested.

Generated data features a compression level speed / ratio curve
which is more in line with expectation.
---
 programs/benchzstd.c |  17 ++--
 programs/benchzstd.h |  11 +--
 programs/lorem.c     | 207 +++++++++++++++++++++++++++++++++++++++++++
 programs/lorem.h     |  32 +++++++
 programs/zstdcli.c   |   4 +-
 5 files changed, 259 insertions(+), 12 deletions(-)
 create mode 100644 programs/lorem.c
 create mode 100644 programs/lorem.h

diff --git a/programs/benchzstd.c b/programs/benchzstd.c
index 9bc3628ee5f..b3af4c3322f 100644
--- a/programs/benchzstd.c
+++ b/programs/benchzstd.c
@@ -32,12 +32,13 @@
 #include "benchfn.h"
 #include "../lib/common/mem.h"
 #ifndef ZSTD_STATIC_LINKING_ONLY
-#define ZSTD_STATIC_LINKING_ONLY
+# define ZSTD_STATIC_LINKING_ONLY
 #endif
 #include "../lib/zstd.h"
 #include "datagen.h"     /* RDG_genBuffer */
+#include "lorem.h"       /* LOREM_genBuffer */
 #ifndef XXH_INLINE_ALL
-#define XXH_INLINE_ALL
+# define XXH_INLINE_ALL
 #endif
 #include "../lib/common/xxhash.h"
 #include "benchzstd.h"
@@ -701,7 +702,8 @@ int BMK_syntheticTest(int cLevel, double compressibility,
                       const ZSTD_compressionParameters* compressionParams,
                       int displayLevel, const BMK_advancedParams_t* adv)
 {
-    char name[20] = {0};
+    char nameBuff[20] = {0};
+    const char* name = nameBuff;
     size_t const benchedSize = 10000000;
     void* srcBuffer;
     BMK_benchOutcome_t res;
@@ -719,10 +721,15 @@ int BMK_syntheticTest(int cLevel, double compressibility,
     }
 
     /* Fill input buffer */
-    RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
+    if (compressibility < 0.0) {
+        LOREM_genBuffer(srcBuffer, benchedSize, 0);
+        name = "Lorem ipsum";
+    } else {
+        RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
+        snprintf (nameBuff, sizeof(nameBuff), "Synthetic %2u%%", (unsigned)(compressibility*100));
+    }
 
     /* Bench */
-    snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100));
     res = BMK_benchCLevel(srcBuffer, benchedSize,
                     &benchedSize /* ? */, 1 /* ? */,
                     cLevel, compressionParams,
diff --git a/programs/benchzstd.h b/programs/benchzstd.h
index f14a681925e..cdb6101c201 100644
--- a/programs/benchzstd.h
+++ b/programs/benchzstd.h
@@ -126,11 +126,12 @@ int BMK_benchFilesAdvanced(
 
 /*! BMK_syntheticTest() -- called from zstdcli */
 /*  Generates a sample with datagen, using compressibility argument */
-/*  cLevel - compression level to benchmark, errors if invalid
- *  compressibility - determines compressibility of sample
- *  compressionParams - basic compression Parameters
- *  displayLevel - see benchFiles
- *  adv - see advanced_Params_t
+/* @cLevel - compression level to benchmark, errors if invalid
+ * @compressibility - determines compressibility of sample, range [0.0 - 1.0]
+ *        if @compressibility < 0.0, uses the lorem ipsum generator
+ * @compressionParams - basic compression Parameters
+ * @displayLevel - see benchFiles
+ * @adv - see advanced_Params_t
  * @return: 0 on success, !0 on error
  */
 int BMK_syntheticTest(int cLevel, double compressibility,
diff --git a/programs/lorem.c b/programs/lorem.c
new file mode 100644
index 00000000000..59dd6da629f
--- /dev/null
+++ b/programs/lorem.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* Implementation notes:
+ *
+ * This is a very simple lorem ipsum generator
+ * which features a static list of words
+ * and print them one after another randomly
+ * with a fake sentence / paragraph structure.
+ *
+ * The goal is to generate a printable text
+ * that can be used to fake a text compression scenario.
+ * The resulting compression / ratio curve of the lorem ipsum generator
+ * is more satisfying than the previous statistical generator,
+ * which was initially designed for entropy compression,
+ * and lacks a regularity more representative of text.
+ *
+ * The compression ratio achievable on the generated lorem ipsum
+ * is still a bit too good, presumably because the dictionary is too small.
+ * It would be possible to create some more complex scheme,
+ * notably by enlarging the dictionary with a word generator,
+ * and adding grammatical rules (composition) and syntax rules.
+ * But that's probably overkill for the intended goal.
+ */
+
+#include "lorem.h"
+#include   /* memcpy */
+#include   /* INT_MAX */
+#include 
+
+#define WORD_MAX_SIZE 20
+
+/* Define the word pool */
+static const char *words[] = {
+    "lorem",       "ipsum",      "dolor",      "sit",          "amet",
+    "consectetur", "adipiscing", "elit",       "sed",          "do",
+    "eiusmod",     "tempor",     "incididunt", "ut",           "labore",
+    "et",          "dolore",     "magna",      "aliqua",       "dis",
+    "lectus",      "vestibulum", "mattis",     "ullamcorper",  "velit",
+    "commodo",     "a",          "lacus",      "arcu",         "magnis",
+    "parturient",  "montes",     "nascetur",   "ridiculus",    "mus",
+    "mauris",      "nulla",      "malesuada",  "pellentesque", "eget",
+    "gravida",     "in",         "dictum",     "non",          "erat",
+    "nam",         "voluptat",   "maecenas",   "blandit",      "aliquam",
+    "etiam",       "enim",       "lobortis",   "scelerisque",  "fermentum",
+    "dui",         "faucibus",   "ornare",     "at",           "elementum",
+    "eu",          "facilisis",  "odio",       "morbi",        "quis",
+    "eros",        "donec",      "ac",         "orci",         "purus",
+    "turpis",      "cursus",     "leo",        "vel",          "porta"};
+
+/* simple distribution that favors small words :
+ * 1 letter : weight 3
+ * 2-3 letters : weight 2
+ * 4+ letters : weight 1
+ * This is expected to be a bit more difficult to compress */
+static const int distrib[] = {
+    0, 1, 2, 3, 3, 4, 5, 6, 7, 8,
+    8,9, 9, 10, 11, 12, 13, 13, 14, 15,
+    15, 16, 17, 18, 19, 19, 20, 21, 22, 23,
+    24, 25, 26, 26, 26, 27, 28, 29, 30, 31,
+    32, 33, 34, 34, 35, 36, 37, 38, 39, 40,
+    41, 41, 42, 43, 43, 44, 45, 45, 46, 47,
+    48, 49, 50, 51, 52, 53, 54, 55, 55, 56,
+    57, 58, 58, 59, 60, 60, 61, 62, 63, 64,
+    65, 66, 67, 67, 68, 69, 70, 71, 72, 72,
+    73, 73, 74 };
+static const unsigned distribCount = sizeof(distrib) / sizeof(distrib[0]);
+
+/* Note: this unit only works when invoked sequentially.
+ * No concurrent access is allowed */
+static char *g_ptr = NULL;
+static size_t g_nbChars = 0;
+static size_t g_maxChars = 10000000;
+static unsigned g_randRoot = 0;
+
+#define RDG_rotl32(x, r) ((x << r) | (x >> (32 - r)))
+static unsigned LOREM_rand(unsigned range) {
+  static const unsigned prime1 = 2654435761U;
+  static const unsigned prime2 = 2246822519U;
+  unsigned rand32 = g_randRoot;
+  rand32 *= prime1;
+  rand32 ^= prime2;
+  rand32 = RDG_rotl32(rand32, 13);
+  g_randRoot = rand32;
+  return (unsigned)(((unsigned long long)rand32 * range) >> 32);
+}
+
+static void writeLastCharacters(void) {
+  size_t lastChars = g_maxChars - g_nbChars;
+  assert(g_maxChars >= g_nbChars);
+  if (lastChars == 0)
+    return;
+  g_ptr[g_nbChars++] = '.';
+  if (lastChars > 2) {
+    memset(g_ptr + g_nbChars, ' ', lastChars - 2);
+  }
+  if (lastChars > 1) {
+    g_ptr[g_maxChars-1] = '\n';
+  }
+  g_nbChars = g_maxChars;
+}
+
+static void generateWord(const char *word, const char *separator, int upCase)
+{
+    size_t const len = strlen(word) + strlen(separator);
+    if (g_nbChars + len > g_maxChars) {
+        writeLastCharacters();
+        return;
+    }
+    memcpy(g_ptr + g_nbChars, word, strlen(word));
+    if (upCase) {
+        static const char toUp = 'A' - 'a';
+        g_ptr[g_nbChars] = (char)(g_ptr[g_nbChars] + toUp);
+    }
+    g_nbChars += strlen(word);
+    memcpy(g_ptr + g_nbChars, separator, strlen(separator));
+    g_nbChars += strlen(separator);
+}
+
+static int about(unsigned target) {
+  return (int)(LOREM_rand(target) + LOREM_rand(target) + 1);
+}
+
+/* Function to generate a random sentence */
+static void generateSentence(int nbWords) {
+  int commaPos = about(9);
+  int comma2 = commaPos + about(7);
+  int i;
+  for (i = 0; i < nbWords; i++) {
+    int const wordID = distrib[LOREM_rand(distribCount)];
+    const char *const word = words[wordID];
+    const char* sep = " ";
+    if (i == commaPos)
+      sep = ", ";
+    if (i == comma2)
+      sep = ", ";
+    if (i == nbWords - 1)
+      sep = ". ";
+    generateWord(word, sep, i==0);
+  }
+}
+
+static void generateParagraph(int nbSentences) {
+  int i;
+  for (i = 0; i < nbSentences; i++) {
+    int wordsPerSentence = about(8);
+    generateSentence(wordsPerSentence);
+  }
+  if (g_nbChars < g_maxChars) {
+    g_ptr[g_nbChars++] = '\n';
+  }
+  if (g_nbChars < g_maxChars) {
+    g_ptr[g_nbChars++] = '\n';
+  }
+}
+
+/* It's "common" for lorem ipsum generators to start with the same first
+ * pre-defined sentence */
+static void generateFirstSentence(void) {
+  int i;
+  for (i = 0; i < 18; i++) {
+    const char *word = words[i];
+    const char *separator = " ";
+    if (i == 4)
+      separator = ", ";
+    if (i == 7)
+      separator = ", ";
+    generateWord(word, separator, i==0);
+  }
+  generateWord(words[18], ". ", 0);
+}
+
+size_t LOREM_genBlock(void* buffer, size_t size,
+                      unsigned seed,
+                      int first, int fill)
+{
+  g_ptr = (char*)buffer;
+  assert(size < INT_MAX);
+  g_maxChars = size;
+  g_nbChars = 0;
+  g_randRoot = seed;
+  if (first) {
+    generateFirstSentence();
+  }
+  while (g_nbChars < g_maxChars) {
+    int sentencePerParagraph = about(7);
+    generateParagraph(sentencePerParagraph);
+    if (!fill)
+      break; /* only generate one paragraph in not-fill mode */
+  }
+  g_ptr = NULL;
+  return g_nbChars;
+}
+
+void LOREM_genBuffer(void* buffer, size_t size, unsigned seed)
+{
+  LOREM_genBlock(buffer, size, seed, 1, 1);
+}
+
diff --git a/programs/lorem.h b/programs/lorem.h
new file mode 100644
index 00000000000..4a87f8748a5
--- /dev/null
+++ b/programs/lorem.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* lorem ipsum generator */
+
+#include    /* size_t */
+
+/*
+ * LOREM_genBuffer():
+ * Generate @size bytes of compressible data using lorem ipsum generator
+ * into provided @buffer.
+ */
+void LOREM_genBuffer(void* buffer, size_t size, unsigned seed);
+
+/*
+ * LOREM_genBlock():
+ * Similar to LOREM_genBuffer, with additional controls :
+ * - @first : generate the first sentence
+ * - @fill : fill the entire @buffer,
+ *           if ==0: generate one paragraph at most.
+ * @return : nb of bytes generated into @buffer.
+ */
+size_t LOREM_genBlock(void* buffer, size_t size,
+                      unsigned seed,
+                      int first, int fill);
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index 3f0ae8bdd21..dd21021b065 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -856,7 +856,7 @@ int main(int argCount, const char* argv[])
     ZSTD_paramSwitch_e useRowMatchFinder = ZSTD_ps_auto;
     FIO_compressionType_t cType = FIO_zstdCompression;
     unsigned nbWorkers = 0;
-    double compressibility = 0.5;
+    double compressibility = -1.0;  /* lorem ipsum generator */
     unsigned bench_nbSeconds = 3;   /* would be better if this value was synchronized from bench */
     size_t blockSize = 0;
 
@@ -1280,7 +1280,7 @@ int main(int argCount, const char* argv[])
                     break;
 
                     /* unknown command */
-                default : 
+                default :
                     sprintf(shortArgument, "-%c", argument[0]);
                     badUsage(programName, shortArgument);
                     CLEAN_RETURN(1);

From a261375996c2301267ef6b00643e6efe92043d8a Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 29 Jan 2024 15:18:27 -0800
Subject: [PATCH 156/283] fix paramgrill Makefile recipe

---
 tests/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Makefile b/tests/Makefile
index c31e7500558..6bf8b4a085d 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -203,7 +203,7 @@ zstreamtest-dll : $(ZSTREAM_LOCAL_FILES)
 CLEAN += paramgrill
 paramgrill : DEBUGFLAGS =   # turn off debug for speed measurements
 paramgrill : LDLIBS += -lm
-paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c $(PRGDIR)/benchzstd.c $(PRGDIR)/datagen.c paramgrill.c
+paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c $(PRGDIR)/benchzstd.c $(PRGDIR)/datagen.c $(PRGDIR)/lorem.c paramgrill.c
 
 CLEAN += datagen
 datagen : $(PRGDIR)/datagen.c datagencli.c

From 3ce4c6e046ed29ba4c0cb05fb48581edc52c1d3c Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 29 Jan 2024 15:24:42 -0800
Subject: [PATCH 157/283] fix Visual Studio solutions

note: we probably don't want to maintain VS2008 solution anymore.
Its successor VS2010 is > 10 years old,
which is more or less the limit after which we can stop supporting old compilers.
---
 build/VS2008/zstd/zstd.vcproj  | 4 ++++
 build/VS2010/zstd/zstd.vcxproj | 1 +
 2 files changed, 5 insertions(+)

diff --git a/build/VS2008/zstd/zstd.vcproj b/build/VS2008/zstd/zstd.vcproj
index 91f2bda536c..de1501d2056 100644
--- a/build/VS2008/zstd/zstd.vcproj
+++ b/build/VS2008/zstd/zstd.vcproj
@@ -356,6 +356,10 @@
 				RelativePath="..\..\..\programs\dibio.c"
 				>
 			
+			
+			
 			
diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj
index 5e1bced6fd4..5a5237f0e48 100644
--- a/build/VS2010/zstd/zstd.vcxproj
+++ b/build/VS2010/zstd/zstd.vcxproj
@@ -63,6 +63,7 @@
     
     
     
+    
     
     
   

From befcec17886479a22028b1d0b632fa15e31d5abc Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 29 Jan 2024 15:45:16 -0800
Subject: [PATCH 158/283] fix cmake recipe

---
 build/cmake/programs/CMakeLists.txt | 7 +------
 build/cmake/tests/CMakeLists.txt    | 4 ++--
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/build/cmake/programs/CMakeLists.txt b/build/cmake/programs/CMakeLists.txt
index 6a816586623..5e239e32a3d 100644
--- a/build/cmake/programs/CMakeLists.txt
+++ b/build/cmake/programs/CMakeLists.txt
@@ -32,12 +32,7 @@ if (MSVC)
     set(PlatformDependResources ${MSVC_RESOURCE_DIR}/zstd.rc)
 endif ()
 
-set(ZSTD_PROGRAM_SRCS ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c
-    ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c
-    ${PROGRAMS_DIR}/fileio_asyncio.c ${PROGRAMS_DIR}/benchfn.c
-    ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c
-    ${PROGRAMS_DIR}/dibio.c ${PROGRAMS_DIR}/zstdcli_trace.c
-    ${PlatformDependResources})
+file(GLOB ZSTD_PROGRAM_SRCS "${PROGRAMS_DIR}/*.c")
 if (MSVC AND ZSTD_PROGRAMS_LINK_SHARED)
     list(APPEND ZSTD_PROGRAM_SRCS ${LIBRARY_DIR}/common/pool.c ${LIBRARY_DIR}/common/threading.c)
 endif ()
diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt
index 250f0508f37..3226374a507 100644
--- a/build/cmake/tests/CMakeLists.txt
+++ b/build/cmake/tests/CMakeLists.txt
@@ -56,7 +56,7 @@ target_link_libraries(datagen libzstd_static)
 #
 # fullbench
 #
-add_executable(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${TESTS_DIR}/fullbench.c)
+add_executable(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/lorem.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${TESTS_DIR}/fullbench.c)
 if (NOT MSVC)
     target_compile_options(fullbench PRIVATE "-Wno-deprecated-declarations")
 endif()
@@ -110,7 +110,7 @@ endif()
 # Label the "Medium" set of tests (see TESTING.md)
 set_property(TEST fuzzer zstreamtest playTests APPEND PROPERTY LABELS Medium)
 
-add_executable(paramgrill ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/paramgrill.c)
+add_executable(paramgrill ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/lorem.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/paramgrill.c)
 if (UNIX)
     target_link_libraries(paramgrill libzstd_static m) #m is math library
 else()

From fd03971252d043bb9d3e065dc2361db6d40c87b6 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 29 Jan 2024 15:50:21 -0800
Subject: [PATCH 159/283] blindfix meson recipe

note: absence of GLOB capability within meson makes its maintenance more painful.
---
 build/meson/programs/meson.build | 1 +
 build/meson/tests/meson.build    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/build/meson/programs/meson.build b/build/meson/programs/meson.build
index 0b5a9305604..e103a629539 100644
--- a/build/meson/programs/meson.build
+++ b/build/meson/programs/meson.build
@@ -18,6 +18,7 @@ zstd_programs_sources = [join_paths(zstd_rootdir, 'programs/zstdcli.c'),
   join_paths(zstd_rootdir, 'programs/benchfn.c'),
   join_paths(zstd_rootdir, 'programs/benchzstd.c'),
   join_paths(zstd_rootdir, 'programs/datagen.c'),
+  join_paths(zstd_rootdir, 'programs/lorem.c'),
   join_paths(zstd_rootdir, 'programs/dibio.c'),
   join_paths(zstd_rootdir, 'programs/zstdcli_trace.c')]
 
diff --git a/build/meson/tests/meson.build b/build/meson/tests/meson.build
index 2dd8d1067e0..03480d20542 100644
--- a/build/meson/tests/meson.build
+++ b/build/meson/tests/meson.build
@@ -29,6 +29,7 @@ DECODECORPUS_TESTTIME = '-T30'
 test_includes = [ include_directories(join_paths(zstd_rootdir, 'programs')) ]
 
 testcommon_sources = [join_paths(zstd_rootdir, 'programs/datagen.c'),
+  join_paths(zstd_rootdir, 'programs/lorem.c'),
   join_paths(zstd_rootdir, 'programs/util.c'),
   join_paths(zstd_rootdir, 'programs/timefn.c'),
   join_paths(zstd_rootdir, 'programs/benchfn.c'),

From de10f56be2765e8375939b97bb27ad3e378f217f Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 29 Jan 2024 23:25:24 -0800
Subject: [PATCH 160/283] improve high compression ratio for file like #3793

this works great for 32-bit arrays,
notably the synthetic ones, with extreme regularity,
unfortunately, it's not universal,
and in some cases, it's a loss.
Crucially, on average, it's a loss on silesia.
The most negatively impacted file is x-ray.
It deserves an investigation before suggesting it as an evolution.
---
 lib/compress/zstd_opt.c | 54 +++++++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 13 deletions(-)

diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 68537e60097..e16e5e4fb13 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -566,7 +566,7 @@ void ZSTD_updateTree_internal(
     const BYTE* const base = ms->window.base;
     U32 const target = (U32)(ip - base);
     U32 idx = ms->nextToUpdate;
-    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
+    DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
                 idx, target, dictMode);
 
     while(idx < target) {
@@ -1069,6 +1069,10 @@ listStats(const U32* table, int lastEltID)
 
 #endif
 
+#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
+#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
+#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
+
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t
@@ -1122,7 +1126,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             U32 const ll0 = !litlen;
             U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
             ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
-                                              (U32)(ip-istart), (U32)(iend - ip));
+                                              (U32)(ip-istart), (U32)(iend-ip));
             if (!nbMatches) { ip++; continue; }
 
             /* initialize opt[0] */
@@ -1134,7 +1138,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
              * in every price. We include the literal length to avoid negative
              * prices when we subtract the previous literal length.
              */
-            opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
+            opt[0].price = LL_PRICE(litlen);
 
             /* large match -> immediate encoding */
             {   U32 const maxML = matches[nbMatches-1].len;
@@ -1155,11 +1159,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 
             /* set prices for first matches starting position == 0 */
             assert(opt[0].price >= 0);
-            {   U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+            {   U32 const literalsPrice = (U32)opt[0].price + (U32)LL_PRICE(0);
                 U32 pos;
                 U32 matchNb;
                 for (pos = 1; pos < minMatch; pos++) {
                     opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
+                    opt[pos].mlen = 0;
                 }
                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
                     U32 const offBase = matches[matchNb].off;
@@ -1173,7 +1178,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                         opt[pos].off = offBase;
                         opt[pos].litlen = litlen;
                         opt[pos].price = (int)sequencePrice;
-                }   }
+                    }
+                    opt[pos].price = ZSTD_MAX_PRICE;
+                }
                 last_pos = pos-1;
             }
         }
@@ -1187,18 +1194,38 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             /* Fix current position with one literal if cheaper */
             {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
                 int const price = opt[cur-1].price
-                                + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
-                                + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
-                                - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
+                                + LIT_PRICE(ip+cur-1)
+                                + LL_INCPRICE(litlen);
                 assert(price < 1000000000); /* overflow check */
                 if (price <= opt[cur].price) {
                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
-                    opt[cur].mlen = 0;
-                    opt[cur].off = 0;
-                    opt[cur].litlen = litlen;
-                    opt[cur].price = price;
+                    if ( (optLevel == 2) /* additional check only for high modes */
+                      && (opt[cur].mlen > 0) /* interrupt a match */
+                      && (LL_INCPRICE(1) < 0) ) /* ll1 is cheaper than ll0 */
+                    {
+                        /* check next position, in case it would be cheaper */
+                        int with1literal = opt[cur].price + LL_INCPRICE(1);
+                        int withMoreLiterals = price + LL_INCPRICE(litlen+1);
+                        DEBUGLOG(7, "But at next rPos %u : match+1lit %.2f vs %ulits %.2f",
+                                cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
+                        if (with1literal < withMoreLiterals) {
+                            DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) !!!", ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals));
+                            /* do not take this literal */
+                        } else {
+                            opt[cur].mlen = 0;
+                            opt[cur].off = 0;
+                            opt[cur].litlen = litlen;
+                            opt[cur].price = price;
+                        }
+                    } else {
+                        /* normal case: take the literal, it's expected to be cheaper */
+                        opt[cur].mlen = 0;
+                        opt[cur].off = 0;
+                        opt[cur].litlen = litlen;
+                        opt[cur].price = price;
+                    }
                 } else {
                     DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
@@ -1236,7 +1263,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             {   U32 const ll0 = (opt[cur].mlen != 0);
                 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
                 U32 const previousPrice = (U32)opt[cur].price;
-                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+                U32 const basePrice = previousPrice + (U32)LL_PRICE(0);
                 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
                 U32 matchNb;
 
@@ -1291,6 +1318,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                             if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
                         }
             }   }   }
+            opt[last_pos+1].price = ZSTD_MAX_PRICE;
         }  /* for (cur = 1; cur <= last_pos; cur++) */
 
         lastSequence = opt[last_pos];

From 4683667785c6248a20eba83dd192dc9baea70d84 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Wed, 31 Jan 2024 02:51:46 -0800
Subject: [PATCH 161/283] refactor optimal parser

store stretches as intermediate solution instead of sequences.
makes it possible to link a solution to a predecessor.
---
 lib/compress/zstd_compress.c           |   8 +-
 lib/compress/zstd_compress_internal.h  |  14 +-
 lib/compress/zstd_opt.c                | 196 +++++++++++++++----------
 lib/decompress/zstd_decompress_block.c |  17 ++-
 4 files changed, 138 insertions(+), 97 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index b4beff7a972..23c517d2be0 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1661,8 +1661,8 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
       + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
       + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
       + ZSTD_cwksp_aligned_alloc_size((1<strategy, useRowMatchFinder)
                                             ? ZSTD_cwksp_aligned_alloc_size(hSize)
                                             : 0;
@@ -2045,8 +2045,8 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
         ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
         ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
         ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
-        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
-        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+2) * sizeof(ZSTD_match_t));
+        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+2) * sizeof(ZSTD_optimal_t));
     }
 
     ms->cParams = *cParams;
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 60f22239e1a..ec34f2a7749 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -159,11 +159,11 @@ typedef struct {
 UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
 
 typedef struct {
-    int price;
-    U32 off;
-    U32 mlen;
-    U32 litlen;
-    U32 rep[ZSTD_REP_NUM];
+    int price;  /* price from beginning of segment to this position */
+    U32 off;    /* offset of previous match */
+    U32 mlen;   /* length of previous match */
+    U32 litlen; /* nb of literals after previous match */
+    U32 rep[ZSTD_REP_NUM];  /* offset history after previous match */
 } ZSTD_optimal_t;
 
 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
@@ -174,8 +174,8 @@ typedef struct {
     unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
     unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
     unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
-    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
-    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
+    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+2 */
+    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+2 */
 
     U32  litSum;                 /* nb of literals */
     U32  litLengthSum;           /* nb of litLength codes */
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index e16e5e4fb13..76f3ef05441 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1047,11 +1047,6 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
 *  Optimal parser
 *********************************/
 
-static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
-{
-    return sol.litlen + sol.mlen;
-}
-
 #if 0 /* debug */
 
 static void
@@ -1101,10 +1096,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 
     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
     ZSTD_match_t* const matches = optStatePtr->matchTable;
-    ZSTD_optimal_t lastSequence;
+    ZSTD_optimal_t lastStretch;
     ZSTD_optLdm_t optLdm;
 
-    ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
+    ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
 
     optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
     optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
@@ -1127,18 +1122,23 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
             ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
                                               (U32)(ip-istart), (U32)(iend-ip));
-            if (!nbMatches) { ip++; continue; }
+            if (!nbMatches) {
+                DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
+                ip++;
+                continue;
+            }
 
             /* initialize opt[0] */
-            { U32 i ; for (i=0; i immediate encoding */
             {   U32 const maxML = matches[nbMatches-1].len;
@@ -1147,37 +1147,36 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                             nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
 
                 if (maxML > sufficient_len) {
-                    lastSequence.litlen = litlen;
-                    lastSequence.mlen = maxML;
-                    lastSequence.off = maxOffBase;
-                    DEBUGLOG(6, "large match (%u>%u), immediate encoding",
+                    lastStretch.litlen = 0;
+                    lastStretch.mlen = maxML;
+                    lastStretch.off = maxOffBase;
+                    DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
                                 maxML, sufficient_len);
                     cur = 0;
-                    last_pos = ZSTD_totalLen(lastSequence);
+                    last_pos = maxML;
                     goto _shortestPath;
             }   }
 
             /* set prices for first matches starting position == 0 */
             assert(opt[0].price >= 0);
-            {   U32 const literalsPrice = (U32)opt[0].price + (U32)LL_PRICE(0);
-                U32 pos;
+            {   U32 pos;
                 U32 matchNb;
                 for (pos = 1; pos < minMatch; pos++) {
-                    opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
-                    opt[pos].mlen = 0;
+                    opt[pos].price = ZSTD_MAX_PRICE;
+                    /* will be updated later on at match check */
                 }
                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
                     U32 const offBase = matches[matchNb].off;
                     U32 const end = matches[matchNb].len;
                     for ( ; pos <= end ; pos++ ) {
-                        U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
-                        U32 const sequencePrice = literalsPrice + matchPrice;
+                        int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
+                        int const sequencePrice = opt[0].price + matchPrice;
                         DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
-                                    pos, ZSTD_fCost((int)sequencePrice));
+                                    pos, ZSTD_fCost(sequencePrice));
                         opt[pos].mlen = pos;
                         opt[pos].off = offBase;
-                        opt[pos].litlen = litlen;
-                        opt[pos].price = (int)sequencePrice;
+                        opt[pos].litlen = 0; /* end of match */
+                        opt[pos].price = sequencePrice + LL_PRICE(0);
                     }
                     opt[pos].price = ZSTD_MAX_PRICE;
                 }
@@ -1192,7 +1191,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
 
             /* Fix current position with one literal if cheaper */
-            {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
+            {   U32 const litlen = opt[cur-1].litlen + 1;
                 int const price = opt[cur-1].price
                                 + LIT_PRICE(ip+cur-1)
                                 + LL_INCPRICE(litlen);
@@ -1201,7 +1200,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
-                    if ( (optLevel == 2) /* additional check only for high modes */
+                    if ( 0 && (optLevel == 2) /* additional check only for high modes */
                       && (opt[cur].mlen > 0) /* interrupt a match */
                       && (LL_INCPRICE(1) < 0) ) /* ll1 is cheaper than ll0 */
                     {
@@ -1214,15 +1213,15 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                             DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) !!!", ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals));
                             /* do not take this literal */
                         } else {
-                            opt[cur].mlen = 0;
-                            opt[cur].off = 0;
+                            opt[cur].mlen = opt[cur-1].mlen;
+                            opt[cur].off = opt[cur-1].off;
                             opt[cur].litlen = litlen;
                             opt[cur].price = price;
                         }
                     } else {
-                        /* normal case: take the literal, it's expected to be cheaper */
-                        opt[cur].mlen = 0;
-                        opt[cur].off = 0;
+                        /* normal case: take the literal, it's expected to be cheaper at position @cur */
+                        opt[cur].mlen = opt[cur-1].mlen;
+                        opt[cur].off = opt[cur-1].off;
                         opt[cur].litlen = litlen;
                         opt[cur].price = price;
                     }
@@ -1240,9 +1239,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
              */
             ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
             assert(cur >= opt[cur].mlen);
-            if (opt[cur].mlen != 0) {
+            if (opt[cur].litlen == 0) {
+                /* just finished a match => alter offset history */
                 U32 const prev = cur - opt[cur].mlen;
-                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
+                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
                 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
             } else {
                 ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
@@ -1255,15 +1255,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 
             if ( (optLevel==0) /*static_test*/
               && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
-                DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
+                DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
             }
 
             assert(opt[cur].price >= 0);
-            {   U32 const ll0 = (opt[cur].mlen != 0);
-                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
-                U32 const previousPrice = (U32)opt[cur].price;
-                U32 const basePrice = previousPrice + (U32)LL_PRICE(0);
+            {   U32 const ll0 = (opt[cur].litlen == 0);
+                int const previousPrice = opt[cur].price;
+                int const basePrice = previousPrice + LL_PRICE(0);
                 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
                 U32 matchNb;
 
@@ -1275,18 +1274,16 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                     continue;
                 }
 
-                {   U32 const maxML = matches[nbMatches-1].len;
-                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
-                                inr-istart, cur, nbMatches, maxML);
-
-                    if ( (maxML > sufficient_len)
-                      || (cur + maxML >= ZSTD_OPT_NUM) ) {
-                        lastSequence.mlen = maxML;
-                        lastSequence.off = matches[nbMatches-1].off;
-                        lastSequence.litlen = litlen;
-                        cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
-                        last_pos = cur + ZSTD_totalLen(lastSequence);
-                        if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
+                {   U32 const longestML = matches[nbMatches-1].len;
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
+                                inr-istart, cur, nbMatches, longestML);
+
+                    if ( (longestML > sufficient_len)
+                      || (cur + longestML >= ZSTD_OPT_NUM) ) {
+                        lastStretch.mlen = longestML;
+                        lastStretch.off = matches[nbMatches-1].off;
+                        lastStretch.litlen = 0;
+                        last_pos = cur + longestML;
                         goto _shortestPath;
                 }   }
 
@@ -1298,11 +1295,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                     U32 mlen;
 
                     DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
-                                matchNb, matches[matchNb].off, lastML, litlen);
+                                matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
 
                     for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
                         U32 const pos = cur + mlen;
-                        int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
+                        int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
 
                         if ((pos > last_pos) || (price < opt[pos].price)) {
                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
@@ -1310,7 +1307,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                             while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
                             opt[pos].mlen = mlen;
                             opt[pos].off = offset;
-                            opt[pos].litlen = litlen;
+                            opt[pos].litlen = 0;
                             opt[pos].price = price;
                         } else {
                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
@@ -1321,41 +1318,77 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             opt[last_pos+1].price = ZSTD_MAX_PRICE;
         }  /* for (cur = 1; cur <= last_pos; cur++) */
 
-        lastSequence = opt[last_pos];
-        cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
-        assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
+        lastStretch = opt[last_pos];
+        assert(cur >= lastStretch.mlen);
+        cur = last_pos - lastStretch.mlen;
 
 _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
         assert(opt[0].mlen == 0);
+        assert(last_pos >= lastStretch.mlen);
+        assert(cur == last_pos - lastStretch.mlen);
+        assert(lastStretch.rep[0] != 0);
+
+        if (lastStretch.mlen==0) {
+            /* no solution : all matches have been converted into literals */
+            assert(lastStretch.litlen == (ip - anchor) + last_pos);
+            ip += last_pos;
+            continue;
+        }
+        assert(lastStretch.off > 0);
 
-        /* Set the next chunk's repcodes based on the repcodes of the beginning
-         * of the last match, and the last sequence. This avoids us having to
-         * update them while traversing the sequences.
-         */
-        if (lastSequence.mlen != 0) {
-            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
-            ZSTD_memcpy(rep, &reps, sizeof(reps));
+        /* Update offset history */
+        if (lastStretch.litlen == 0) {
+            /* finishing on a match : update offset history */
+            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
+            ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
         } else {
-            ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
+            ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
+            assert(cur >= lastStretch.litlen);
+            cur -= lastStretch.litlen;
         }
 
-        {   U32 const storeEnd = cur + 1;
+        /* let's write the shortest path solution
+         * solution is stored in @opt,
+         * in reverse order,
+         * starting from @storeEnd (==cur+1)
+         * (effectively partially overwriting @opt).
+         * Content is changed too:
+         * - So far, @opt stored stretches, aka a match followed by literals
+         * - Now, it will store sequences, aka literals followed by a match
+         */
+        {   U32 const storeEnd = cur + 2;
             U32 storeStart = storeEnd;
-            U32 seqPos = cur;
+            U32 stretchPos = cur;
+            ZSTD_optimal_t nextStretch;
 
             DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
                         last_pos, cur); (void)last_pos;
             assert(storeEnd < ZSTD_OPT_NUM);
-            DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
-                        storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
-            opt[storeEnd] = lastSequence;
-            while (seqPos > 0) {
-                U32 const backDist = ZSTD_totalLen(opt[seqPos]);
+            DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+                        storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
+            if (lastStretch.litlen > 0) {
+                /* last "sequence" is unfinished: just a bunch of literals */
+                opt[storeEnd].litlen = lastStretch.litlen;
+                opt[storeEnd].mlen = 0;
+                storeStart = storeEnd-1;
+                opt[storeStart] = lastStretch;
+            } {
+                opt[storeEnd] = lastStretch;  /* note: litlen will be fixed */
+                storeStart = storeEnd;
+            }
+            while (1) {
+                nextStretch = opt[stretchPos];
+                opt[storeStart].litlen = nextStretch.litlen;
+                DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
+                            opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
+                if (nextStretch.mlen == 0) {
+                    /* reaching beginning of segment */
+                    break;
+                }
                 storeStart--;
-                DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
-                            seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
-                opt[storeStart] = opt[seqPos];
-                seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
+                opt[storeStart] = nextStretch; /* note: litlen will be fixed */
+                assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
+                stretchPos -= nextStretch.litlen + nextStretch.mlen;
             }
 
             /* save sequences */
@@ -1381,6 +1414,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                     anchor += advance;
                     ip = anchor;
             }   }
+            DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
+
+            /* update all costs */
             ZSTD_setBasePrices(optStatePtr, optLevel);
         }
     }   /* while (ip < ilimit) */
@@ -1476,7 +1512,7 @@ size_t ZSTD_compressBlock_btultra2(
      * Consequently, this can only work if no data has been previously loaded in tables,
      * aka, no dictionary, no prefix, no ldm preprocessing.
      * The compression ratio gain is generally small (~0.5% on first block),
-    ** the cost is 2x cpu time on first block. */
+     * the cost is 2x cpu time on first block. */
     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
     if ( (ms->opt.litLengthSum==0)   /* first block */
       && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c
index 11bf201bc36..4be145732d9 100644
--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@@ -1585,7 +1585,8 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
     /* last literal segment */
     if (dctx->litBufferLocation == ZSTD_split) {
         /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
-        size_t const lastLLSize = litBufferEnd - litPtr;
+        size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
+        DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize);
         RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
         if (op != NULL) {
             ZSTD_memmove(op, litPtr, lastLLSize);
@@ -1596,14 +1597,16 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
         dctx->litBufferLocation = ZSTD_not_in_dst;
     }
     /* copy last literals from internal buffer */
-    {   size_t const lastLLSize = litBufferEnd - litPtr;
+    {   size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
+        DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize);
         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
         if (op != NULL) {
             ZSTD_memcpy(op, litPtr, lastLLSize);
             op += lastLLSize;
     }   }
 
-    return op-ostart;
+    DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
+    return (size_t)(op - ostart);
 }
 
 FORCE_INLINE_TEMPLATE size_t
@@ -1673,14 +1676,16 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
     }
 
     /* last literal segment */
-    {   size_t const lastLLSize = litEnd - litPtr;
+    {   size_t const lastLLSize = (size_t)(litEnd - litPtr);
+        DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize);
         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
         if (op != NULL) {
             ZSTD_memcpy(op, litPtr, lastLLSize);
             op += lastLLSize;
     }   }
 
-    return op-ostart;
+    DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
+    return (size_t)(op - ostart);
 }
 
 static size_t
@@ -1878,7 +1883,7 @@ ZSTD_decompressSequencesLong_body(
         }
     }
 
-    return op-ostart;
+    return (size_t)(op - ostart);
 }
 
 static size_t

From 0166b2ba8083481df3ae68e3431a43f541d3c9bd Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Wed, 31 Jan 2024 11:12:57 -0800
Subject: [PATCH 162/283] modification: differentiate literal update at pos+1

helps when litlen==1 is cheaper than litlen==0

works great on pathological arr[u32] examples
but doesn't generalize well on other files.

silesia/x-ray is amoung the most negatively affected ones.
---
 lib/compress/zstd_opt.c | 52 +++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 76f3ef05441..d7fb191cc9d 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1200,8 +1200,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
-                    if ( 0 && (optLevel == 2) /* additional check only for high modes */
-                      && (opt[cur].mlen > 0) /* interrupt a match */
+                    if ((optLevel == 2) /* additional check only for high modes */
+                      && (opt[cur].litlen == 0) /* interrupt a match */
                       && (LL_INCPRICE(1) < 0) ) /* ll1 is cheaper than ll0 */
                     {
                         /* check next position, in case it would be cheaper */
@@ -1209,33 +1209,35 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                         int withMoreLiterals = price + LL_INCPRICE(litlen+1);
                         DEBUGLOG(7, "But at next rPos %u : match+1lit %.2f vs %ulits %.2f",
                                 cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
-                        if (with1literal < withMoreLiterals) {
-                            DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) !!!", ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals));
-                            /* do not take this literal */
-                        } else {
-                            opt[cur].mlen = opt[cur-1].mlen;
-                            opt[cur].off = opt[cur-1].off;
-                            opt[cur].litlen = litlen;
-                            opt[cur].price = price;
+                        if ( (with1literal < withMoreLiterals)
+                          && (with1literal < opt[cur+1].price) ) {
+                            /* update offset history - before it disappears */
+                            U32 const prev = cur - opt[cur].mlen;
+                            repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
+                            assert(cur >= opt[cur].mlen);
+                            DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
+                                        ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
+                                        newReps.rep[0], newReps.rep[1], newReps.rep[2] );
+                            opt[cur+1] = opt[cur];  /* mlen & offbase */
+                            ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
+                            opt[cur+1].litlen = 1;
+                            opt[cur+1].price = with1literal;
+                            if (last_pos < cur+1) last_pos = cur+1;
                         }
-                    } else {
-                        /* normal case: take the literal, it's expected to be cheaper at position @cur */
-                        opt[cur].mlen = opt[cur-1].mlen;
-                        opt[cur].off = opt[cur-1].off;
-                        opt[cur].litlen = litlen;
-                        opt[cur].price = price;
                     }
+                    opt[cur].mlen = opt[cur-1].mlen;
+                    opt[cur].off = opt[cur-1].off;
+                    opt[cur].litlen = litlen;
+                    opt[cur].price = price;
+                    ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
                 } else {
-                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
-                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
-                                opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
+                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
                 }
             }
 
-            /* Set the repcodes of the current position. We must do it here
-             * because we rely on the repcodes of the 2nd to last sequence being
-             * correct to set the next chunks repcodes during the backward
-             * traversal.
+            /* Offset history is not updated during match comparison.
+             * Do it here, now that the match is selected and confirmed.
              */
             ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
             assert(cur >= opt[cur].mlen);
@@ -1244,8 +1246,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                 U32 const prev = cur - opt[cur].mlen;
                 repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
                 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
-            } else {
-                ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
             }
 
             /* last match must start at a minimum distance of 8 from oend */
@@ -1514,6 +1514,7 @@ size_t ZSTD_compressBlock_btultra2(
      * The compression ratio gain is generally small (~0.5% on first block),
      * the cost is 2x cpu time on first block. */
     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
+    //g_debuglevel = -g_debuglevel;
     if ( (ms->opt.litLengthSum==0)   /* first block */
       && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
       && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
@@ -1523,6 +1524,7 @@ size_t ZSTD_compressBlock_btultra2(
         ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
     }
 
+    //g_debuglevel = -g_debuglevel;
     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
 }
 #endif

From 04a6c8cbe240495f2dcf7ab108bec327ef245813 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" 
Date: Wed, 31 Jan 2024 11:07:15 -0500
Subject: [PATCH 163/283] Update Dependency in Intel CET Test; Re-Enable Test

Motivated by #3884.
---
 .github/workflows/dev-short-tests.yml | 41 +++++++++++++--------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml
index b834ce2d745..020f8419e25 100644
--- a/.github/workflows/dev-short-tests.yml
+++ b/.github/workflows/dev-short-tests.yml
@@ -577,6 +577,25 @@ jobs:
         make -C programs zstd-pgo
         ./programs/zstd -b
 
+  intel-cet-compatibility:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
+    - name: Build Zstd
+      run: |
+        make -j zstd V=1
+        readelf -n zstd
+    - name: Get Intel SDE
+      run: |
+        curl -LO https://downloadmirror.intel.com/813591/sde-external-9.33.0-2024-01-07-lin.tar.xz
+        tar xJvf sde-external-9.33.0-2024-01-07-lin.tar.xz
+    - name: Configure Permissions
+      run: |
+        echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
+    - name: Run Under SDE
+      run: |
+        sde-external-9.33.0-2024-01-07-lin/sde -cet -cet-raise 0 -cet-endbr-exe -cet-stderr -cet-abort -- ./zstd -b3
+
 
 # Failing tests, for reference
 
@@ -600,25 +619,3 @@ jobs:
 #    - name: make check
 #      run: |
 #        make CC=/opt/intel/oneapi/compiler/latest/linux/bin/intel64/icc check
-
-
-# intel-cet-compatibility tests are currently failing, during download stage
-#
-#  intel-cet-compatibility:
-#    runs-on: ubuntu-latest
-#    steps:
-#    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
-#    - name: Build Zstd
-#      run: |
-#        make -j zstd V=1
-#        readelf -n zstd
-#    - name: Get Intel SDE
-#      run: |
-#        curl -LO https://downloadmirror.intel.com/684899/sde-external-9.0.0-2021-11-07-lin.tar.xz
-#        tar xJvf sde-external-9.0.0-2021-11-07-lin.tar.xz
-#    - name: Configure Permissions
-#      run: |
-#        echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
-#    - name: Run Under SDE
-#      run: |
-#        sde-external-9.0.0-2021-11-07-lin/sde -cet -cet-raise 0 -cet-endbr-exe -cet-stderr -cet-abort -- ./zstd -b3

From d31018e223691256aac9c426fcfbeec735a2d6ab Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 3 Feb 2024 14:26:18 -0800
Subject: [PATCH 164/283] finally, a version that generalizes well

While it's not always strictly a win,
it's a win for files that see a noticeably compression ratio increase,
while it's a very small noise for other files.

Downside is, this patch is less efficient for 32-bit arrays of integer
than the previous patch which was introducing losses for other files,
but it's still a net improvement on this scenario.
---
 lib/compress/zstd_opt.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index d7fb191cc9d..2c4af2f062a 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1178,9 +1178,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                         opt[pos].litlen = 0; /* end of match */
                         opt[pos].price = sequencePrice + LL_PRICE(0);
                     }
-                    opt[pos].price = ZSTD_MAX_PRICE;
                 }
                 last_pos = pos-1;
+                opt[pos].price = ZSTD_MAX_PRICE;
+                opt[pos+1].price = ZSTD_MAX_PRICE;
             }
         }
 
@@ -1197,39 +1198,47 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                                 + LL_INCPRICE(litlen);
                 assert(price < 1000000000); /* overflow check */
                 if (price <= opt[cur].price) {
+                    ZSTD_optimal_t const prevMatch = opt[cur];
                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
+                    opt[cur].mlen = opt[cur-1].mlen;
+                    opt[cur].off = opt[cur-1].off;
+                    opt[cur].litlen = litlen;
+                    opt[cur].price = price;
+                    ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
                     if ((optLevel == 2) /* additional check only for high modes */
-                      && (opt[cur].litlen == 0) /* interrupt a match */
+                      && (prevMatch.litlen == 0) /* interrupt a match */
                       && (LL_INCPRICE(1) < 0) ) /* ll1 is cheaper than ll0 */
                     {
                         /* check next position, in case it would be cheaper */
-                        int with1literal = opt[cur].price + LL_INCPRICE(1);
-                        int withMoreLiterals = price + LL_INCPRICE(litlen+1);
+                        int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
+                        int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
                         DEBUGLOG(7, "But at next rPos %u : match+1lit %.2f vs %ulits %.2f",
                                 cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
                         if ( (with1literal < withMoreLiterals)
                           && (with1literal < opt[cur+1].price) ) {
                             /* update offset history - before it disappears */
-                            U32 const prev = cur - opt[cur].mlen;
-                            repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
-                            assert(cur >= opt[cur].mlen);
+                            U32 const prev = cur - prevMatch.mlen;
+                            repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
+                            assert(cur >= prevMatch.mlen);
                             DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
                                         ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
                                         newReps.rep[0], newReps.rep[1], newReps.rep[2] );
-                            opt[cur+1] = opt[cur];  /* mlen & offbase */
+                            opt[cur+1] = prevMatch;  /* mlen & offbase */
                             ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
                             opt[cur+1].litlen = 1;
                             opt[cur+1].price = with1literal;
                             if (last_pos < cur+1) last_pos = cur+1;
+#if 0
+                            /* but, for following byte, get back to literal run */
+                            opt[cur+2] = opt[cur];
+                            opt[cur+2].litlen += 2;
+                            opt[cur+2].price += LIT_PRICE(ip+cur) + LIT_PRICE(ip+cur+1) + LL_INCPRICE(litlen+1) + LL_INCPRICE(litlen+2);
+                            if (last_pos < cur+2) last_pos = cur+2;
+#endif
                         }
                     }
-                    opt[cur].mlen = opt[cur-1].mlen;
-                    opt[cur].off = opt[cur-1].off;
-                    opt[cur].litlen = litlen;
-                    opt[cur].price = price;
-                    ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
                 } else {
                     DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
@@ -1316,6 +1325,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                         }
             }   }   }
             opt[last_pos+1].price = ZSTD_MAX_PRICE;
+            opt[last_pos+2].price = ZSTD_MAX_PRICE;
         }  /* for (cur = 1; cur <= last_pos; cur++) */
 
         lastStretch = opt[last_pos];

From 8168a451e58261baf9a53b0c1bcfdaff2ba0480d Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 3 Feb 2024 17:26:47 -0800
Subject: [PATCH 165/283] minor optimization, mostly for clarity

---
 lib/compress/zstd_opt.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 2c4af2f062a..bbf367b24de 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1181,7 +1181,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                 }
                 last_pos = pos-1;
                 opt[pos].price = ZSTD_MAX_PRICE;
-                opt[pos+1].price = ZSTD_MAX_PRICE;
             }
         }
 
@@ -1202,19 +1201,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
-                    opt[cur].mlen = opt[cur-1].mlen;
-                    opt[cur].off = opt[cur-1].off;
+                    opt[cur] = opt[cur-1];
                     opt[cur].litlen = litlen;
                     opt[cur].price = price;
-                    ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
                     if ((optLevel == 2) /* additional check only for high modes */
                       && (prevMatch.litlen == 0) /* interrupt a match */
-                      && (LL_INCPRICE(1) < 0) ) /* ll1 is cheaper than ll0 */
-                    {
+                      && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
+                    ) {
                         /* check next position, in case it would be cheaper */
                         int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
                         int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
-                        DEBUGLOG(7, "But at next rPos %u : match+1lit %.2f vs %ulits %.2f",
+                        DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
                                 cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
                         if ( (with1literal < withMoreLiterals)
                           && (with1literal < opt[cur+1].price) ) {
@@ -1230,13 +1227,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                             opt[cur+1].litlen = 1;
                             opt[cur+1].price = with1literal;
                             if (last_pos < cur+1) last_pos = cur+1;
-#if 0
-                            /* but, for following byte, get back to literal run */
-                            opt[cur+2] = opt[cur];
-                            opt[cur+2].litlen += 2;
-                            opt[cur+2].price += LIT_PRICE(ip+cur) + LIT_PRICE(ip+cur+1) + LL_INCPRICE(litlen+1) + LL_INCPRICE(litlen+2);
-                            if (last_pos < cur+2) last_pos = cur+2;
-#endif
                         }
                     }
                 } else {
@@ -1325,7 +1315,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                         }
             }   }   }
             opt[last_pos+1].price = ZSTD_MAX_PRICE;
-            opt[last_pos+2].price = ZSTD_MAX_PRICE;
         }  /* for (cur = 1; cur <= last_pos; cur++) */
 
         lastStretch = opt[last_pos];

From e5af24c5fa82186d61ee1ed4dfe161d65a1c1a7d Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 3 Feb 2024 17:48:29 -0800
Subject: [PATCH 166/283] fixed wrong assert

---
 lib/compress/zstd_opt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index bbf367b24de..bcebfaa3559 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1325,7 +1325,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
         assert(opt[0].mlen == 0);
         assert(last_pos >= lastStretch.mlen);
         assert(cur == last_pos - lastStretch.mlen);
-        assert(lastStretch.rep[0] != 0);
 
         if (lastStretch.mlen==0) {
             /* no solution : all matches have been converted into literals */

From 9ae3bf5ee2ea676594b84afc6a3a4edcc22a19bf Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 3 Feb 2024 17:52:50 -0800
Subject: [PATCH 167/283] update compression results

good news: there are only improvements
---
 tests/regression/results.csv | 84 ++++++++++++++++++------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/tests/regression/results.csv b/tests/regression/results.csv
index d072c0d850b..6d398c0e7f6 100644
--- a/tests/regression/results.csv
+++ b/tests/regression/results.csv
@@ -11,10 +11,10 @@ silesia.tar,                        level 6,                            compress
 silesia.tar,                        level 7,                            compress simple,                    4579828
 silesia.tar,                        level 9,                            compress simple,                    4555448
 silesia.tar,                        level 13,                           compress simple,                    4502956
-silesia.tar,                        level 16,                           compress simple,                    4360546
-silesia.tar,                        level 19,                           compress simple,                    4265911
+silesia.tar,                        level 16,                           compress simple,                    4360529
+silesia.tar,                        level 19,                           compress simple,                    4261475
 silesia.tar,                        uncompressed literals,              compress simple,                    4854086
-silesia.tar,                        uncompressed literals optimal,      compress simple,                    4265911
+silesia.tar,                        uncompressed literals optimal,      compress simple,                    4261475
 silesia.tar,                        huffman literals,                   compress simple,                    6179047
 github.tar,                         level -5,                           compress simple,                    52115
 github.tar,                         level -3,                           compress simple,                    45678
@@ -45,8 +45,8 @@ silesia,                            level 6,                            compress
 silesia,                            level 7,                            compress cctx,                      4570271
 silesia,                            level 9,                            compress cctx,                      4545850
 silesia,                            level 13,                           compress cctx,                      4493990
-silesia,                            level 16,                           compress cctx,                      4360041
-silesia,                            level 19,                           compress cctx,                      4296055
+silesia,                            level 16,                           compress cctx,                      4359969
+silesia,                            level 19,                           compress cctx,                      4267082
 silesia,                            long distance mode,                 compress cctx,                      4842075
 silesia,                            multithreaded,                      compress cctx,                      4842075
 silesia,                            multithreaded long distance mode,   compress cctx,                      4842075
@@ -55,7 +55,7 @@ silesia,                            small hash log,                     compress
 silesia,                            small chain log,                    compress cctx,                      4912197
 silesia,                            explicit params,                    compress cctx,                      4794318
 silesia,                            uncompressed literals,              compress cctx,                      4842075
-silesia,                            uncompressed literals optimal,      compress cctx,                      4296055
+silesia,                            uncompressed literals optimal,      compress cctx,                      4267082
 silesia,                            huffman literals,                   compress cctx,                      6172202
 silesia,                            multithreaded with advanced params, compress cctx,                      4842075
 github,                             level -5,                           compress cctx,                      204407
@@ -109,8 +109,8 @@ silesia,                            level 6,                            zstdcli,
 silesia,                            level 7,                            zstdcli,                            4570319
 silesia,                            level 9,                            zstdcli,                            4545898
 silesia,                            level 13,                           zstdcli,                            4494038
-silesia,                            level 16,                           zstdcli,                            4360089
-silesia,                            level 19,                           zstdcli,                            4296103
+silesia,                            level 16,                           zstdcli,                            4360017
+silesia,                            level 19,                           zstdcli,                            4267130
 silesia,                            long distance mode,                 zstdcli,                            4833785
 silesia,                            multithreaded,                      zstdcli,                            4842123
 silesia,                            multithreaded long distance mode,   zstdcli,                            4833785
@@ -119,7 +119,7 @@ silesia,                            small hash log,                     zstdcli,
 silesia,                            small chain log,                    zstdcli,                            4912245
 silesia,                            explicit params,                    zstdcli,                            4795840
 silesia,                            uncompressed literals,              zstdcli,                            5120614
-silesia,                            uncompressed literals optimal,      zstdcli,                            4319566
+silesia,                            uncompressed literals optimal,      zstdcli,                            4317385
 silesia,                            huffman literals,                   zstdcli,                            5321417
 silesia,                            multithreaded with advanced params, zstdcli,                            5120614
 silesia.tar,                        level -5,                           zstdcli,                            6862049
@@ -134,8 +134,8 @@ silesia.tar,                        level 6,                            zstdcli,
 silesia.tar,                        level 7,                            zstdcli,                            4581791
 silesia.tar,                        level 9,                            zstdcli,                            4555452
 silesia.tar,                        level 13,                           zstdcli,                            4502960
-silesia.tar,                        level 16,                           zstdcli,                            4360550
-silesia.tar,                        level 19,                           zstdcli,                            4265915
+silesia.tar,                        level 16,                           zstdcli,                            4360533
+silesia.tar,                        level 19,                           zstdcli,                            4261479
 silesia.tar,                        no source size,                     zstdcli,                            4854160
 silesia.tar,                        long distance mode,                 zstdcli,                            4845745
 silesia.tar,                        multithreaded,                      zstdcli,                            4854164
@@ -145,7 +145,7 @@ silesia.tar,                        small hash log,                     zstdcli,
 silesia.tar,                        small chain log,                    zstdcli,                            4917022
 silesia.tar,                        explicit params,                    zstdcli,                            4821112
 silesia.tar,                        uncompressed literals,              zstdcli,                            5122571
-silesia.tar,                        uncompressed literals optimal,      zstdcli,                            4310145
+silesia.tar,                        uncompressed literals optimal,      zstdcli,                            4308929
 silesia.tar,                        huffman literals,                   zstdcli,                            5342074
 silesia.tar,                        multithreaded with advanced params, zstdcli,                            5122571
 github,                             level -5,                           zstdcli,                            206407
@@ -248,8 +248,8 @@ silesia,                            level 11 row 2,                     advanced
 silesia,                            level 12 row 1,                     advanced one pass,                  4505658
 silesia,                            level 12 row 2,                     advanced one pass,                  4503429
 silesia,                            level 13,                           advanced one pass,                  4493990
-silesia,                            level 16,                           advanced one pass,                  4360041
-silesia,                            level 19,                           advanced one pass,                  4296055
+silesia,                            level 16,                           advanced one pass,                  4359969
+silesia,                            level 19,                           advanced one pass,                  4267082
 silesia,                            no source size,                     advanced one pass,                  4842075
 silesia,                            long distance mode,                 advanced one pass,                  4833710
 silesia,                            multithreaded,                      advanced one pass,                  4842075
@@ -259,7 +259,7 @@ silesia,                            small hash log,                     advanced
 silesia,                            small chain log,                    advanced one pass,                  4912197
 silesia,                            explicit params,                    advanced one pass,                  4795840
 silesia,                            uncompressed literals,              advanced one pass,                  5120566
-silesia,                            uncompressed literals optimal,      advanced one pass,                  4319518
+silesia,                            uncompressed literals optimal,      advanced one pass,                  4317337
 silesia,                            huffman literals,                   advanced one pass,                  5321369
 silesia,                            multithreaded with advanced params, advanced one pass,                  5120566
 silesia.tar,                        level -5,                           advanced one pass,                  6861055
@@ -282,8 +282,8 @@ silesia.tar,                        level 11 row 2,                     advanced
 silesia.tar,                        level 12 row 1,                     advanced one pass,                  4514517
 silesia.tar,                        level 12 row 2,                     advanced one pass,                  4514007
 silesia.tar,                        level 13,                           advanced one pass,                  4502956
-silesia.tar,                        level 16,                           advanced one pass,                  4360546
-silesia.tar,                        level 19,                           advanced one pass,                  4265911
+silesia.tar,                        level 16,                           advanced one pass,                  4360529
+silesia.tar,                        level 19,                           advanced one pass,                  4261475
 silesia.tar,                        no source size,                     advanced one pass,                  4854086
 silesia.tar,                        long distance mode,                 advanced one pass,                  4840452
 silesia.tar,                        multithreaded,                      advanced one pass,                  4854160
@@ -293,7 +293,7 @@ silesia.tar,                        small hash log,                     advanced
 silesia.tar,                        small chain log,                    advanced one pass,                  4917041
 silesia.tar,                        explicit params,                    advanced one pass,                  4807274
 silesia.tar,                        uncompressed literals,              advanced one pass,                  5122473
-silesia.tar,                        uncompressed literals optimal,      advanced one pass,                  4310141
+silesia.tar,                        uncompressed literals optimal,      advanced one pass,                  4308925
 silesia.tar,                        huffman literals,                   advanced one pass,                  5341705
 silesia.tar,                        multithreaded with advanced params, advanced one pass,                  5122567
 github,                             level -5,                           advanced one pass,                  204407
@@ -566,8 +566,8 @@ silesia,                            level 11 row 2,                     advanced
 silesia,                            level 12 row 1,                     advanced one pass small out,        4505658
 silesia,                            level 12 row 2,                     advanced one pass small out,        4503429
 silesia,                            level 13,                           advanced one pass small out,        4493990
-silesia,                            level 16,                           advanced one pass small out,        4360041
-silesia,                            level 19,                           advanced one pass small out,        4296055
+silesia,                            level 16,                           advanced one pass small out,        4359969
+silesia,                            level 19,                           advanced one pass small out,        4267082
 silesia,                            no source size,                     advanced one pass small out,        4842075
 silesia,                            long distance mode,                 advanced one pass small out,        4833710
 silesia,                            multithreaded,                      advanced one pass small out,        4842075
@@ -577,7 +577,7 @@ silesia,                            small hash log,                     advanced
 silesia,                            small chain log,                    advanced one pass small out,        4912197
 silesia,                            explicit params,                    advanced one pass small out,        4795840
 silesia,                            uncompressed literals,              advanced one pass small out,        5120566
-silesia,                            uncompressed literals optimal,      advanced one pass small out,        4319518
+silesia,                            uncompressed literals optimal,      advanced one pass small out,        4317337
 silesia,                            huffman literals,                   advanced one pass small out,        5321369
 silesia,                            multithreaded with advanced params, advanced one pass small out,        5120566
 silesia.tar,                        level -5,                           advanced one pass small out,        6861055
@@ -600,8 +600,8 @@ silesia.tar,                        level 11 row 2,                     advanced
 silesia.tar,                        level 12 row 1,                     advanced one pass small out,        4514517
 silesia.tar,                        level 12 row 2,                     advanced one pass small out,        4514007
 silesia.tar,                        level 13,                           advanced one pass small out,        4502956
-silesia.tar,                        level 16,                           advanced one pass small out,        4360546
-silesia.tar,                        level 19,                           advanced one pass small out,        4265911
+silesia.tar,                        level 16,                           advanced one pass small out,        4360529
+silesia.tar,                        level 19,                           advanced one pass small out,        4261475
 silesia.tar,                        no source size,                     advanced one pass small out,        4854086
 silesia.tar,                        long distance mode,                 advanced one pass small out,        4840452
 silesia.tar,                        multithreaded,                      advanced one pass small out,        4854160
@@ -611,7 +611,7 @@ silesia.tar,                        small hash log,                     advanced
 silesia.tar,                        small chain log,                    advanced one pass small out,        4917041
 silesia.tar,                        explicit params,                    advanced one pass small out,        4807274
 silesia.tar,                        uncompressed literals,              advanced one pass small out,        5122473
-silesia.tar,                        uncompressed literals optimal,      advanced one pass small out,        4310141
+silesia.tar,                        uncompressed literals optimal,      advanced one pass small out,        4308925
 silesia.tar,                        huffman literals,                   advanced one pass small out,        5341705
 silesia.tar,                        multithreaded with advanced params, advanced one pass small out,        5122567
 github,                             level -5,                           advanced one pass small out,        204407
@@ -884,8 +884,8 @@ silesia,                            level 11 row 2,                     advanced
 silesia,                            level 12 row 1,                     advanced streaming,                 4505658
 silesia,                            level 12 row 2,                     advanced streaming,                 4503429
 silesia,                            level 13,                           advanced streaming,                 4493990
-silesia,                            level 16,                           advanced streaming,                 4360041
-silesia,                            level 19,                           advanced streaming,                 4296055
+silesia,                            level 16,                           advanced streaming,                 4359969
+silesia,                            level 19,                           advanced streaming,                 4267082
 silesia,                            no source size,                     advanced streaming,                 4842039
 silesia,                            long distance mode,                 advanced streaming,                 4833710
 silesia,                            multithreaded,                      advanced streaming,                 4842075
@@ -895,7 +895,7 @@ silesia,                            small hash log,                     advanced
 silesia,                            small chain log,                    advanced streaming,                 4912197
 silesia,                            explicit params,                    advanced streaming,                 4795857
 silesia,                            uncompressed literals,              advanced streaming,                 5120566
-silesia,                            uncompressed literals optimal,      advanced streaming,                 4319518
+silesia,                            uncompressed literals optimal,      advanced streaming,                 4317337
 silesia,                            huffman literals,                   advanced streaming,                 5321370
 silesia,                            multithreaded with advanced params, advanced streaming,                 5120566
 silesia.tar,                        level -5,                           advanced streaming,                 6856523
@@ -918,8 +918,8 @@ silesia.tar,                        level 11 row 2,                     advanced
 silesia.tar,                        level 12 row 1,                     advanced streaming,                 4514514
 silesia.tar,                        level 12 row 2,                     advanced streaming,                 4514003
 silesia.tar,                        level 13,                           advanced streaming,                 4502956
-silesia.tar,                        level 16,                           advanced streaming,                 4360546
-silesia.tar,                        level 19,                           advanced streaming,                 4265911
+silesia.tar,                        level 16,                           advanced streaming,                 4360529
+silesia.tar,                        level 19,                           advanced streaming,                 4261475
 silesia.tar,                        no source size,                     advanced streaming,                 4859267
 silesia.tar,                        long distance mode,                 advanced streaming,                 4840452
 silesia.tar,                        multithreaded,                      advanced streaming,                 4854160
@@ -929,7 +929,7 @@ silesia.tar,                        small hash log,                     advanced
 silesia.tar,                        small chain log,                    advanced streaming,                 4917021
 silesia.tar,                        explicit params,                    advanced streaming,                 4807288
 silesia.tar,                        uncompressed literals,              advanced streaming,                 5127423
-silesia.tar,                        uncompressed literals optimal,      advanced streaming,                 4310141
+silesia.tar,                        uncompressed literals optimal,      advanced streaming,                 4308925
 silesia.tar,                        huffman literals,                   advanced streaming,                 5341712
 silesia.tar,                        multithreaded with advanced params, advanced streaming,                 5122567
 github,                             level -5,                           advanced streaming,                 204407
@@ -1194,11 +1194,11 @@ silesia,                            level 6,                            old stre
 silesia,                            level 7,                            old streaming,                      4570271
 silesia,                            level 9,                            old streaming,                      4545850
 silesia,                            level 13,                           old streaming,                      4493990
-silesia,                            level 16,                           old streaming,                      4360041
-silesia,                            level 19,                           old streaming,                      4296055
+silesia,                            level 16,                           old streaming,                      4359969
+silesia,                            level 19,                           old streaming,                      4267082
 silesia,                            no source size,                     old streaming,                      4842039
 silesia,                            uncompressed literals,              old streaming,                      4842075
-silesia,                            uncompressed literals optimal,      old streaming,                      4296055
+silesia,                            uncompressed literals optimal,      old streaming,                      4267082
 silesia,                            huffman literals,                   old streaming,                      6172207
 silesia.tar,                        level -5,                           old streaming,                      6856523
 silesia.tar,                        level -3,                           old streaming,                      6505954
@@ -1212,11 +1212,11 @@ silesia.tar,                        level 6,                            old stre
 silesia.tar,                        level 7,                            old streaming,                      4579823
 silesia.tar,                        level 9,                            old streaming,                      4555445
 silesia.tar,                        level 13,                           old streaming,                      4502956
-silesia.tar,                        level 16,                           old streaming,                      4360546
-silesia.tar,                        level 19,                           old streaming,                      4265911
+silesia.tar,                        level 16,                           old streaming,                      4360529
+silesia.tar,                        level 19,                           old streaming,                      4261475
 silesia.tar,                        no source size,                     old streaming,                      4859267
 silesia.tar,                        uncompressed literals,              old streaming,                      4859271
-silesia.tar,                        uncompressed literals optimal,      old streaming,                      4265911
+silesia.tar,                        uncompressed literals optimal,      old streaming,                      4261475
 silesia.tar,                        huffman literals,                   old streaming,                      6179056
 github,                             level -5,                           old streaming,                      204407
 github,                             level -5 with dict,                 old streaming,                      45832
@@ -1296,8 +1296,8 @@ silesia,                            level 6,                            old stre
 silesia,                            level 7,                            old streaming advanced,             4570271
 silesia,                            level 9,                            old streaming advanced,             4545850
 silesia,                            level 13,                           old streaming advanced,             4493990
-silesia,                            level 16,                           old streaming advanced,             4360041
-silesia,                            level 19,                           old streaming advanced,             4296055
+silesia,                            level 16,                           old streaming advanced,             4359969
+silesia,                            level 19,                           old streaming advanced,             4267082
 silesia,                            no source size,                     old streaming advanced,             4842039
 silesia,                            long distance mode,                 old streaming advanced,             4842075
 silesia,                            multithreaded,                      old streaming advanced,             4842075
@@ -1307,7 +1307,7 @@ silesia,                            small hash log,                     old stre
 silesia,                            small chain log,                    old streaming advanced,             4912197
 silesia,                            explicit params,                    old streaming advanced,             4795857
 silesia,                            uncompressed literals,              old streaming advanced,             4842075
-silesia,                            uncompressed literals optimal,      old streaming advanced,             4296055
+silesia,                            uncompressed literals optimal,      old streaming advanced,             4267082
 silesia,                            huffman literals,                   old streaming advanced,             6172207
 silesia,                            multithreaded with advanced params, old streaming advanced,             4842075
 silesia.tar,                        level -5,                           old streaming advanced,             6856523
@@ -1322,8 +1322,8 @@ silesia.tar,                        level 6,                            old stre
 silesia.tar,                        level 7,                            old streaming advanced,             4579823
 silesia.tar,                        level 9,                            old streaming advanced,             4555445
 silesia.tar,                        level 13,                           old streaming advanced,             4502956
-silesia.tar,                        level 16,                           old streaming advanced,             4360546
-silesia.tar,                        level 19,                           old streaming advanced,             4265911
+silesia.tar,                        level 16,                           old streaming advanced,             4360529
+silesia.tar,                        level 19,                           old streaming advanced,             4261475
 silesia.tar,                        no source size,                     old streaming advanced,             4859267
 silesia.tar,                        long distance mode,                 old streaming advanced,             4859271
 silesia.tar,                        multithreaded,                      old streaming advanced,             4859271
@@ -1333,7 +1333,7 @@ silesia.tar,                        small hash log,                     old stre
 silesia.tar,                        small chain log,                    old streaming advanced,             4917021
 silesia.tar,                        explicit params,                    old streaming advanced,             4807288
 silesia.tar,                        uncompressed literals,              old streaming advanced,             4859271
-silesia.tar,                        uncompressed literals optimal,      old streaming advanced,             4265911
+silesia.tar,                        uncompressed literals optimal,      old streaming advanced,             4261475
 silesia.tar,                        huffman literals,                   old streaming advanced,             6179056
 silesia.tar,                        multithreaded with advanced params, old streaming advanced,             4859271
 github,                             level -5,                           old streaming advanced,             213265

From 5474edbe6016175453d09eca139566baefe0b97b Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 3 Feb 2024 19:31:53 -0800
Subject: [PATCH 168/283] fixed wrong assert

by introducing ZSTD_OPT_SIZE
---
 lib/compress/zstd_compress.c          | 8 ++++----
 lib/compress/zstd_compress_internal.h | 5 +++--
 lib/compress/zstd_opt.c               | 2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 23c517d2be0..f8abbbbd91b 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1661,8 +1661,8 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
       + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
       + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
       + ZSTD_cwksp_aligned_alloc_size((1<strategy, useRowMatchFinder)
                                             ? ZSTD_cwksp_aligned_alloc_size(hSize)
                                             : 0;
@@ -2045,8 +2045,8 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
         ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
         ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
         ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
-        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+2) * sizeof(ZSTD_match_t));
-        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+2) * sizeof(ZSTD_optimal_t));
+        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_match_t));
+        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
     }
 
     ms->cParams = *cParams;
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index ec34f2a7749..dae8526d461 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -168,14 +168,15 @@ typedef struct {
 
 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
 
+#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+2)
 typedef struct {
     /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
     unsigned* litFreq;           /* table of literals statistics, of size 256 */
     unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
     unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
     unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
-    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+2 */
-    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+2 */
+    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_SIZE */
+    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */
 
     U32  litSum;                 /* nb of literals */
     U32  litLengthSum;           /* nb of litLength codes */
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index bcebfaa3559..eb86470629d 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1361,7 +1361,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 
             DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
                         last_pos, cur); (void)last_pos;
-            assert(storeEnd < ZSTD_OPT_NUM);
+            assert(storeEnd < ZSTD_OPT_SIZE);
             DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
                         storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
             if (lastStretch.litlen > 0) {

From 0ae21d8c3170741e4005c877d3c300a6034601ec Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 3 Feb 2024 19:32:59 -0800
Subject: [PATCH 169/283] removed trace control

---
 lib/compress/zstd_opt.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index eb86470629d..04587e855aa 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1512,7 +1512,6 @@ size_t ZSTD_compressBlock_btultra2(
      * The compression ratio gain is generally small (~0.5% on first block),
      * the cost is 2x cpu time on first block. */
     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
-    //g_debuglevel = -g_debuglevel;
     if ( (ms->opt.litLengthSum==0)   /* first block */
       && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
       && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
@@ -1522,7 +1521,6 @@ size_t ZSTD_compressBlock_btultra2(
         ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
     }
 
-    //g_debuglevel = -g_debuglevel;
     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
 }
 #endif

From fe2e2ad36d434d0989ca669d3a4f4d60f1cb907b Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 3 Feb 2024 19:57:38 -0800
Subject: [PATCH 170/283] use ZSTD_memcpy()

which can be redirected in Linux kernel mode
---
 lib/compress/zstd_opt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 04587e855aa..20a30406a31 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1138,7 +1138,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
              */
             opt[0].price = LL_PRICE(litlen);
             ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
-            memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
+            ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
 
             /* large match -> immediate encoding */
             {   U32 const maxML = matches[nbMatches-1].len;

From 9fed5ef108d63ff25964574c2ec980e578b1adbc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Feb 2024 05:43:28 +0000
Subject: [PATCH 171/283] Bump microsoft/setup-msbuild from 1.3.2 to 2.0.0

Bumps [microsoft/setup-msbuild](https://github.com/microsoft/setup-msbuild) from 1.3.2 to 2.0.0.
- [Release notes](https://github.com/microsoft/setup-msbuild/releases)
- [Changelog](https://github.com/microsoft/setup-msbuild/blob/main/building-release.md)
- [Commits](https://github.com/microsoft/setup-msbuild/compare/031090342aeefe171e49f3820f3b52110c66e402...6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce)

---
updated-dependencies:
- dependency-name: microsoft/setup-msbuild
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] 
---
 .github/workflows/dev-short-tests.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml
index 28bb7098027..52e88dfeede 100644
--- a/.github/workflows/dev-short-tests.yml
+++ b/.github/workflows/dev-short-tests.yml
@@ -271,7 +271,7 @@ jobs:
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: Add MSBuild to PATH
-      uses: microsoft/setup-msbuild@031090342aeefe171e49f3820f3b52110c66e402 # tag=v1.3
+      uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v1.3
     - name: Build
       working-directory: ${{env.GITHUB_WORKSPACE}}
       run: |
@@ -298,7 +298,7 @@ jobs:
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: Add MSBuild to PATH
-      uses: microsoft/setup-msbuild@031090342aeefe171e49f3820f3b52110c66e402 # tag=v1.3
+      uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v1.3
     - name: Build ${{matrix.name}}
       working-directory: ${{env.GITHUB_WORKSPACE}}
       # See https://docs.microsoft.com/visualstudio/msbuild/msbuild-command-line-reference
@@ -485,7 +485,7 @@ jobs:
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: Add MSBuild to PATH
-      uses: microsoft/setup-msbuild@031090342aeefe171e49f3820f3b52110c66e402 # tag=v1.3
+      uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v1.3
     - name: Build and run tests
       working-directory: ${{env.GITHUB_WORKSPACE}}
       env:

From 0d9fb5dc3394161097dc54642bd793e6de3f7593 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sun, 4 Feb 2024 22:45:18 -0800
Subject: [PATCH 172/283] fix msbuild action version number

---
 .github/workflows/dev-short-tests.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml
index 52e88dfeede..3123e129bc3 100644
--- a/.github/workflows/dev-short-tests.yml
+++ b/.github/workflows/dev-short-tests.yml
@@ -271,7 +271,7 @@ jobs:
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: Add MSBuild to PATH
-      uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v1.3
+      uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v2.0.0
     - name: Build
       working-directory: ${{env.GITHUB_WORKSPACE}}
       run: |
@@ -298,7 +298,7 @@ jobs:
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: Add MSBuild to PATH
-      uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v1.3
+      uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v2.0.0
     - name: Build ${{matrix.name}}
       working-directory: ${{env.GITHUB_WORKSPACE}}
       # See https://docs.microsoft.com/visualstudio/msbuild/msbuild-command-line-reference
@@ -485,7 +485,7 @@ jobs:
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
     - name: Add MSBuild to PATH
-      uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v1.3
+      uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v2.0.0
     - name: Build and run tests
       working-directory: ${{env.GITHUB_WORKSPACE}}
       env:

From 641749fc0935b6905b2fcfaa362cedfc631f5960 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 5 Feb 2024 00:36:10 -0800
Subject: [PATCH 173/283] fix uasan dictionary_stream_round_trip fuzz test

---
 lib/compress/zstd_opt.c | 6 ++++--
 tests/Makefile          | 5 +++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 20a30406a31..eed3319299e 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -267,6 +267,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
                                 const optState_t* const optPtr,
                                 int optLevel)
 {
+    DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
     if (litLength == 0) return 0;
 
     if (!ZSTD_compressedLiterals(optPtr))
@@ -1204,7 +1205,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                     opt[cur] = opt[cur-1];
                     opt[cur].litlen = litlen;
                     opt[cur].price = price;
-                    if ((optLevel == 2) /* additional check only for high modes */
+                    if ( (optLevel == 2) /* additional check only for high modes */
                       && (prevMatch.litlen == 0) /* interrupt a match */
                       && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
                     ) {
@@ -1278,7 +1279,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                                 inr-istart, cur, nbMatches, longestML);
 
                     if ( (longestML > sufficient_len)
-                      || (cur + longestML >= ZSTD_OPT_NUM) ) {
+                      || (cur + longestML >= ZSTD_OPT_NUM)
+                      || (ip + cur + longestML >= iend) ) {
                         lastStretch.mlen = longestML;
                         lastStretch.off = matches[nbMatches-1].off;
                         lastStretch.litlen = 0;
diff --git a/tests/Makefile b/tests/Makefile
index c31e7500558..2f33e1d0f0b 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -251,8 +251,9 @@ checkTag.o : $(ZSTDDIR)/zstd.h
 clean:
 	$(MAKE) -C $(ZSTDDIR) clean
 	$(MAKE) -C $(PRGDIR) clean
-	$(RM) -fR $(TESTARTEFACT)
-	$(RM) -rf tmp*  # some test directories are named tmp*
+	$(MAKE) -C fuzz clean
+	$(RM) -R $(TESTARTEFACT)
+	$(RM) -r tmp*  # some test directories are named tmp*
 	$(RM) $(CLEAN) core *.o *.tmp result* *.gcda dictionary *.zst \
         $(PRGDIR)/zstd$(EXT) $(PRGDIR)/zstd32$(EXT) \
         fullbench-dll$(EXT) fuzzer-dll$(EXT) zstreamtest-dll$(EXT)

From 6c35fb2e8cb826b70226856cd7442861037cca8a Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 5 Feb 2024 01:21:06 -0800
Subject: [PATCH 174/283] fix msan warnings

---
 Makefile                |  5 +++--
 lib/compress/zstd_opt.c | 14 ++++++++++----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index 87d80d16dea..11eca19ce20 100644
--- a/Makefile
+++ b/Makefile
@@ -328,8 +328,9 @@ asan-%: clean
 msan: clean
 	$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" HAVE_LZMA=0   # datagen.c fails this test for no obvious reason
 
-msan-%: clean
-	LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -C $(TESTDIR) HAVE_LZMA=0 $*
+msan-%:
+	$(MAKE) clean
+	LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -j -C $(TESTDIR) HAVE_LZMA=0 $*
 
 asan32: clean
 	$(MAKE) -C $(TESTDIR) test32 CC=clang MOREFLAGS="-g -fsanitize=address $(MOREFLAGS)"
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index eed3319299e..25715eabba8 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1164,7 +1164,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                 U32 matchNb;
                 for (pos = 1; pos < minMatch; pos++) {
                     opt[pos].price = ZSTD_MAX_PRICE;
-                    /* will be updated later on at match check */
+                    opt[pos].mlen = 0;
+                    opt[pos].litlen = litlen + pos;
                 }
                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
                     U32 const offBase = matches[matchNb].off;
@@ -1205,8 +1206,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                     opt[cur] = opt[cur-1];
                     opt[cur].litlen = litlen;
                     opt[cur].price = price;
-                    if ( (optLevel == 2) /* additional check only for high modes */
-                      && (prevMatch.litlen == 0) /* interrupt a match */
+                    if ( (optLevel >= 1) /* additional check only for higher modes */
+                      && (prevMatch.litlen == 0) /* replace a match */
                       && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
                     ) {
                         /* check next position, in case it would be cheaper */
@@ -1305,7 +1306,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                         if ((pos > last_pos) || (price < opt[pos].price)) {
                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
-                            while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
+                            while (last_pos < pos) {
+                                /* fill empty positions, for future comparisons */
+                                last_pos++;
+                                opt[last_pos].price = ZSTD_MAX_PRICE;
+                                opt[last_pos].litlen = !0;  /* just needs to be != 0, to mean "not an end of match" */
+                            }
                             opt[pos].mlen = mlen;
                             opt[pos].off = offset;
                             opt[pos].litlen = 0;

From 887f5b62aea77ec26a1a693ff9d8e2b1eba3a2cd Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 5 Feb 2024 01:27:22 -0800
Subject: [PATCH 175/283] update compression results for regression tests

---
 tests/regression/results.csv | 264 +++++++++++++++++------------------
 1 file changed, 132 insertions(+), 132 deletions(-)

diff --git a/tests/regression/results.csv b/tests/regression/results.csv
index 6d398c0e7f6..fc3fbe7c7e1 100644
--- a/tests/regression/results.csv
+++ b/tests/regression/results.csv
@@ -11,10 +11,10 @@ silesia.tar,                        level 6,                            compress
 silesia.tar,                        level 7,                            compress simple,                    4579828
 silesia.tar,                        level 9,                            compress simple,                    4555448
 silesia.tar,                        level 13,                           compress simple,                    4502956
-silesia.tar,                        level 16,                           compress simple,                    4360529
-silesia.tar,                        level 19,                           compress simple,                    4261475
+silesia.tar,                        level 16,                           compress simple,                    4360385
+silesia.tar,                        level 19,                           compress simple,                    4260939
 silesia.tar,                        uncompressed literals,              compress simple,                    4854086
-silesia.tar,                        uncompressed literals optimal,      compress simple,                    4261475
+silesia.tar,                        uncompressed literals optimal,      compress simple,                    4260939
 silesia.tar,                        huffman literals,                   compress simple,                    6179047
 github.tar,                         level -5,                           compress simple,                    52115
 github.tar,                         level -3,                           compress simple,                    45678
@@ -29,9 +29,9 @@ github.tar,                         level 7,                            compress
 github.tar,                         level 9,                            compress simple,                    36723
 github.tar,                         level 13,                           compress simple,                    35501
 github.tar,                         level 16,                           compress simple,                    40466
-github.tar,                         level 19,                           compress simple,                    32276
+github.tar,                         level 19,                           compress simple,                    32262
 github.tar,                         uncompressed literals,              compress simple,                    38831
-github.tar,                         uncompressed literals optimal,      compress simple,                    32276
+github.tar,                         uncompressed literals optimal,      compress simple,                    32262
 github.tar,                         huffman literals,                   compress simple,                    42560
 silesia,                            level -5,                           compress cctx,                      6857372
 silesia,                            level -3,                           compress cctx,                      6503412
@@ -45,8 +45,8 @@ silesia,                            level 6,                            compress
 silesia,                            level 7,                            compress cctx,                      4570271
 silesia,                            level 9,                            compress cctx,                      4545850
 silesia,                            level 13,                           compress cctx,                      4493990
-silesia,                            level 16,                           compress cctx,                      4359969
-silesia,                            level 19,                           compress cctx,                      4267082
+silesia,                            level 16,                           compress cctx,                      4359652
+silesia,                            level 19,                           compress cctx,                      4266582
 silesia,                            long distance mode,                 compress cctx,                      4842075
 silesia,                            multithreaded,                      compress cctx,                      4842075
 silesia,                            multithreaded long distance mode,   compress cctx,                      4842075
@@ -55,7 +55,7 @@ silesia,                            small hash log,                     compress
 silesia,                            small chain log,                    compress cctx,                      4912197
 silesia,                            explicit params,                    compress cctx,                      4794318
 silesia,                            uncompressed literals,              compress cctx,                      4842075
-silesia,                            uncompressed literals optimal,      compress cctx,                      4267082
+silesia,                            uncompressed literals optimal,      compress cctx,                      4266582
 silesia,                            huffman literals,                   compress cctx,                      6172202
 silesia,                            multithreaded with advanced params, compress cctx,                      4842075
 github,                             level -5,                           compress cctx,                      204407
@@ -83,9 +83,9 @@ github,                             level 9 with dict,                  compress
 github,                             level 13,                           compress cctx,                      132878
 github,                             level 13 with dict,                 compress cctx,                      39948
 github,                             level 16,                           compress cctx,                      133209
-github,                             level 16 with dict,                 compress cctx,                      37568
+github,                             level 16 with dict,                 compress cctx,                      37892
 github,                             level 19,                           compress cctx,                      132879
-github,                             level 19 with dict,                 compress cctx,                      37567
+github,                             level 19 with dict,                 compress cctx,                      37906
 github,                             long distance mode,                 compress cctx,                      141069
 github,                             multithreaded,                      compress cctx,                      141069
 github,                             multithreaded long distance mode,   compress cctx,                      141069
@@ -109,8 +109,8 @@ silesia,                            level 6,                            zstdcli,
 silesia,                            level 7,                            zstdcli,                            4570319
 silesia,                            level 9,                            zstdcli,                            4545898
 silesia,                            level 13,                           zstdcli,                            4494038
-silesia,                            level 16,                           zstdcli,                            4360017
-silesia,                            level 19,                           zstdcli,                            4267130
+silesia,                            level 16,                           zstdcli,                            4359700
+silesia,                            level 19,                           zstdcli,                            4266630
 silesia,                            long distance mode,                 zstdcli,                            4833785
 silesia,                            multithreaded,                      zstdcli,                            4842123
 silesia,                            multithreaded long distance mode,   zstdcli,                            4833785
@@ -119,7 +119,7 @@ silesia,                            small hash log,                     zstdcli,
 silesia,                            small chain log,                    zstdcli,                            4912245
 silesia,                            explicit params,                    zstdcli,                            4795840
 silesia,                            uncompressed literals,              zstdcli,                            5120614
-silesia,                            uncompressed literals optimal,      zstdcli,                            4317385
+silesia,                            uncompressed literals optimal,      zstdcli,                            4316928
 silesia,                            huffman literals,                   zstdcli,                            5321417
 silesia,                            multithreaded with advanced params, zstdcli,                            5120614
 silesia.tar,                        level -5,                           zstdcli,                            6862049
@@ -134,8 +134,8 @@ silesia.tar,                        level 6,                            zstdcli,
 silesia.tar,                        level 7,                            zstdcli,                            4581791
 silesia.tar,                        level 9,                            zstdcli,                            4555452
 silesia.tar,                        level 13,                           zstdcli,                            4502960
-silesia.tar,                        level 16,                           zstdcli,                            4360533
-silesia.tar,                        level 19,                           zstdcli,                            4261479
+silesia.tar,                        level 16,                           zstdcli,                            4360389
+silesia.tar,                        level 19,                           zstdcli,                            4260943
 silesia.tar,                        no source size,                     zstdcli,                            4854160
 silesia.tar,                        long distance mode,                 zstdcli,                            4845745
 silesia.tar,                        multithreaded,                      zstdcli,                            4854164
@@ -145,7 +145,7 @@ silesia.tar,                        small hash log,                     zstdcli,
 silesia.tar,                        small chain log,                    zstdcli,                            4917022
 silesia.tar,                        explicit params,                    zstdcli,                            4821112
 silesia.tar,                        uncompressed literals,              zstdcli,                            5122571
-silesia.tar,                        uncompressed literals optimal,      zstdcli,                            4308929
+silesia.tar,                        uncompressed literals optimal,      zstdcli,                            4308455
 silesia.tar,                        huffman literals,                   zstdcli,                            5342074
 silesia.tar,                        multithreaded with advanced params, zstdcli,                            5122571
 github,                             level -5,                           zstdcli,                            206407
@@ -173,9 +173,9 @@ github,                             level 9 with dict,                  zstdcli,
 github,                             level 13,                           zstdcli,                            134878
 github,                             level 13 with dict,                 zstdcli,                            41900
 github,                             level 16,                           zstdcli,                            135209
-github,                             level 16 with dict,                 zstdcli,                            39577
+github,                             level 16 with dict,                 zstdcli,                            39902
 github,                             level 19,                           zstdcli,                            134879
-github,                             level 19 with dict,                 zstdcli,                            39576
+github,                             level 19 with dict,                 zstdcli,                            39916
 github,                             long distance mode,                 zstdcli,                            138332
 github,                             multithreaded,                      zstdcli,                            138332
 github,                             multithreaded long distance mode,   zstdcli,                            138332
@@ -212,9 +212,9 @@ github.tar,                         level 9 with dict,                  zstdcli,
 github.tar,                         level 13,                           zstdcli,                            35505
 github.tar,                         level 13 with dict,                 zstdcli,                            37134
 github.tar,                         level 16,                           zstdcli,                            40470
-github.tar,                         level 16 with dict,                 zstdcli,                            33378
-github.tar,                         level 19,                           zstdcli,                            32280
-github.tar,                         level 19 with dict,                 zstdcli,                            32716
+github.tar,                         level 16 with dict,                 zstdcli,                            33379
+github.tar,                         level 19,                           zstdcli,                            32266
+github.tar,                         level 19 with dict,                 zstdcli,                            32705
 github.tar,                         no source size,                     zstdcli,                            38832
 github.tar,                         no source size with dict,           zstdcli,                            38004
 github.tar,                         long distance mode,                 zstdcli,                            40236
@@ -225,7 +225,7 @@ github.tar,                         small hash log,                     zstdcli,
 github.tar,                         small chain log,                    zstdcli,                            41673
 github.tar,                         explicit params,                    zstdcli,                            41385
 github.tar,                         uncompressed literals,              zstdcli,                            41529
-github.tar,                         uncompressed literals optimal,      zstdcli,                            35401
+github.tar,                         uncompressed literals optimal,      zstdcli,                            35360
 github.tar,                         huffman literals,                   zstdcli,                            38857
 github.tar,                         multithreaded with advanced params, zstdcli,                            41529
 silesia,                            level -5,                           advanced one pass,                  6857372
@@ -248,8 +248,8 @@ silesia,                            level 11 row 2,                     advanced
 silesia,                            level 12 row 1,                     advanced one pass,                  4505658
 silesia,                            level 12 row 2,                     advanced one pass,                  4503429
 silesia,                            level 13,                           advanced one pass,                  4493990
-silesia,                            level 16,                           advanced one pass,                  4359969
-silesia,                            level 19,                           advanced one pass,                  4267082
+silesia,                            level 16,                           advanced one pass,                  4359652
+silesia,                            level 19,                           advanced one pass,                  4266582
 silesia,                            no source size,                     advanced one pass,                  4842075
 silesia,                            long distance mode,                 advanced one pass,                  4833710
 silesia,                            multithreaded,                      advanced one pass,                  4842075
@@ -259,7 +259,7 @@ silesia,                            small hash log,                     advanced
 silesia,                            small chain log,                    advanced one pass,                  4912197
 silesia,                            explicit params,                    advanced one pass,                  4795840
 silesia,                            uncompressed literals,              advanced one pass,                  5120566
-silesia,                            uncompressed literals optimal,      advanced one pass,                  4317337
+silesia,                            uncompressed literals optimal,      advanced one pass,                  4316880
 silesia,                            huffman literals,                   advanced one pass,                  5321369
 silesia,                            multithreaded with advanced params, advanced one pass,                  5120566
 silesia.tar,                        level -5,                           advanced one pass,                  6861055
@@ -282,8 +282,8 @@ silesia.tar,                        level 11 row 2,                     advanced
 silesia.tar,                        level 12 row 1,                     advanced one pass,                  4514517
 silesia.tar,                        level 12 row 2,                     advanced one pass,                  4514007
 silesia.tar,                        level 13,                           advanced one pass,                  4502956
-silesia.tar,                        level 16,                           advanced one pass,                  4360529
-silesia.tar,                        level 19,                           advanced one pass,                  4261475
+silesia.tar,                        level 16,                           advanced one pass,                  4360385
+silesia.tar,                        level 19,                           advanced one pass,                  4260939
 silesia.tar,                        no source size,                     advanced one pass,                  4854086
 silesia.tar,                        long distance mode,                 advanced one pass,                  4840452
 silesia.tar,                        multithreaded,                      advanced one pass,                  4854160
@@ -293,7 +293,7 @@ silesia.tar,                        small hash log,                     advanced
 silesia.tar,                        small chain log,                    advanced one pass,                  4917041
 silesia.tar,                        explicit params,                    advanced one pass,                  4807274
 silesia.tar,                        uncompressed literals,              advanced one pass,                  5122473
-silesia.tar,                        uncompressed literals optimal,      advanced one pass,                  4308925
+silesia.tar,                        uncompressed literals optimal,      advanced one pass,                  4308451
 silesia.tar,                        huffman literals,                   advanced one pass,                  5341705
 silesia.tar,                        multithreaded with advanced params, advanced one pass,                  5122567
 github,                             level -5,                           advanced one pass,                  204407
@@ -397,17 +397,17 @@ github,                             level 13 with dict dds,             advanced
 github,                             level 13 with dict copy,            advanced one pass,                  39948
 github,                             level 13 with dict load,            advanced one pass,                  42624
 github,                             level 16,                           advanced one pass,                  133209
-github,                             level 16 with dict,                 advanced one pass,                  37577
-github,                             level 16 with dict dms,             advanced one pass,                  37577
-github,                             level 16 with dict dds,             advanced one pass,                  37577
-github,                             level 16 with dict copy,            advanced one pass,                  37568
-github,                             level 16 with dict load,            advanced one pass,                  42338
+github,                             level 16 with dict,                 advanced one pass,                  37902
+github,                             level 16 with dict dms,             advanced one pass,                  37902
+github,                             level 16 with dict dds,             advanced one pass,                  37902
+github,                             level 16 with dict copy,            advanced one pass,                  37892
+github,                             level 16 with dict load,            advanced one pass,                  42402
 github,                             level 19,                           advanced one pass,                  132879
-github,                             level 19 with dict,                 advanced one pass,                  37576
-github,                             level 19 with dict dms,             advanced one pass,                  37576
-github,                             level 19 with dict dds,             advanced one pass,                  37576
-github,                             level 19 with dict copy,            advanced one pass,                  37567
-github,                             level 19 with dict load,            advanced one pass,                  39613
+github,                             level 19 with dict,                 advanced one pass,                  37916
+github,                             level 19 with dict dms,             advanced one pass,                  37916
+github,                             level 19 with dict dds,             advanced one pass,                  37916
+github,                             level 19 with dict copy,            advanced one pass,                  37906
+github,                             level 19 with dict load,            advanced one pass,                  39770
 github,                             no source size,                     advanced one pass,                  136332
 github,                             no source size with dict,           advanced one pass,                  41148
 github,                             long distance mode,                 advanced one pass,                  136332
@@ -522,17 +522,17 @@ github.tar,                         level 13 with dict dds,             advanced
 github.tar,                         level 13 with dict copy,            advanced one pass,                  37130
 github.tar,                         level 13 with dict load,            advanced one pass,                  36010
 github.tar,                         level 16,                           advanced one pass,                  40466
-github.tar,                         level 16 with dict,                 advanced one pass,                  33374
-github.tar,                         level 16 with dict dms,             advanced one pass,                  33206
-github.tar,                         level 16 with dict dds,             advanced one pass,                  33206
-github.tar,                         level 16 with dict copy,            advanced one pass,                  33374
+github.tar,                         level 16 with dict,                 advanced one pass,                  33375
+github.tar,                         level 16 with dict dms,             advanced one pass,                  33207
+github.tar,                         level 16 with dict dds,             advanced one pass,                  33207
+github.tar,                         level 16 with dict copy,            advanced one pass,                  33375
 github.tar,                         level 16 with dict load,            advanced one pass,                  39081
-github.tar,                         level 19,                           advanced one pass,                  32276
-github.tar,                         level 19 with dict,                 advanced one pass,                  32712
-github.tar,                         level 19 with dict dms,             advanced one pass,                  32555
-github.tar,                         level 19 with dict dds,             advanced one pass,                  32555
-github.tar,                         level 19 with dict copy,            advanced one pass,                  32712
-github.tar,                         level 19 with dict load,            advanced one pass,                  32479
+github.tar,                         level 19,                           advanced one pass,                  32262
+github.tar,                         level 19 with dict,                 advanced one pass,                  32701
+github.tar,                         level 19 with dict dms,             advanced one pass,                  32565
+github.tar,                         level 19 with dict dds,             advanced one pass,                  32565
+github.tar,                         level 19 with dict copy,            advanced one pass,                  32701
+github.tar,                         level 19 with dict load,            advanced one pass,                  32428
 github.tar,                         no source size,                     advanced one pass,                  38831
 github.tar,                         no source size with dict,           advanced one pass,                  37995
 github.tar,                         long distance mode,                 advanced one pass,                  40252
@@ -543,7 +543,7 @@ github.tar,                         small hash log,                     advanced
 github.tar,                         small chain log,                    advanced one pass,                  41669
 github.tar,                         explicit params,                    advanced one pass,                  41385
 github.tar,                         uncompressed literals,              advanced one pass,                  41525
-github.tar,                         uncompressed literals optimal,      advanced one pass,                  35397
+github.tar,                         uncompressed literals optimal,      advanced one pass,                  35356
 github.tar,                         huffman literals,                   advanced one pass,                  38853
 github.tar,                         multithreaded with advanced params, advanced one pass,                  41525
 silesia,                            level -5,                           advanced one pass small out,        6857372
@@ -566,8 +566,8 @@ silesia,                            level 11 row 2,                     advanced
 silesia,                            level 12 row 1,                     advanced one pass small out,        4505658
 silesia,                            level 12 row 2,                     advanced one pass small out,        4503429
 silesia,                            level 13,                           advanced one pass small out,        4493990
-silesia,                            level 16,                           advanced one pass small out,        4359969
-silesia,                            level 19,                           advanced one pass small out,        4267082
+silesia,                            level 16,                           advanced one pass small out,        4359652
+silesia,                            level 19,                           advanced one pass small out,        4266582
 silesia,                            no source size,                     advanced one pass small out,        4842075
 silesia,                            long distance mode,                 advanced one pass small out,        4833710
 silesia,                            multithreaded,                      advanced one pass small out,        4842075
@@ -577,7 +577,7 @@ silesia,                            small hash log,                     advanced
 silesia,                            small chain log,                    advanced one pass small out,        4912197
 silesia,                            explicit params,                    advanced one pass small out,        4795840
 silesia,                            uncompressed literals,              advanced one pass small out,        5120566
-silesia,                            uncompressed literals optimal,      advanced one pass small out,        4317337
+silesia,                            uncompressed literals optimal,      advanced one pass small out,        4316880
 silesia,                            huffman literals,                   advanced one pass small out,        5321369
 silesia,                            multithreaded with advanced params, advanced one pass small out,        5120566
 silesia.tar,                        level -5,                           advanced one pass small out,        6861055
@@ -600,8 +600,8 @@ silesia.tar,                        level 11 row 2,                     advanced
 silesia.tar,                        level 12 row 1,                     advanced one pass small out,        4514517
 silesia.tar,                        level 12 row 2,                     advanced one pass small out,        4514007
 silesia.tar,                        level 13,                           advanced one pass small out,        4502956
-silesia.tar,                        level 16,                           advanced one pass small out,        4360529
-silesia.tar,                        level 19,                           advanced one pass small out,        4261475
+silesia.tar,                        level 16,                           advanced one pass small out,        4360385
+silesia.tar,                        level 19,                           advanced one pass small out,        4260939
 silesia.tar,                        no source size,                     advanced one pass small out,        4854086
 silesia.tar,                        long distance mode,                 advanced one pass small out,        4840452
 silesia.tar,                        multithreaded,                      advanced one pass small out,        4854160
@@ -611,7 +611,7 @@ silesia.tar,                        small hash log,                     advanced
 silesia.tar,                        small chain log,                    advanced one pass small out,        4917041
 silesia.tar,                        explicit params,                    advanced one pass small out,        4807274
 silesia.tar,                        uncompressed literals,              advanced one pass small out,        5122473
-silesia.tar,                        uncompressed literals optimal,      advanced one pass small out,        4308925
+silesia.tar,                        uncompressed literals optimal,      advanced one pass small out,        4308451
 silesia.tar,                        huffman literals,                   advanced one pass small out,        5341705
 silesia.tar,                        multithreaded with advanced params, advanced one pass small out,        5122567
 github,                             level -5,                           advanced one pass small out,        204407
@@ -715,17 +715,17 @@ github,                             level 13 with dict dds,             advanced
 github,                             level 13 with dict copy,            advanced one pass small out,        39948
 github,                             level 13 with dict load,            advanced one pass small out,        42624
 github,                             level 16,                           advanced one pass small out,        133209
-github,                             level 16 with dict,                 advanced one pass small out,        37577
-github,                             level 16 with dict dms,             advanced one pass small out,        37577
-github,                             level 16 with dict dds,             advanced one pass small out,        37577
-github,                             level 16 with dict copy,            advanced one pass small out,        37568
-github,                             level 16 with dict load,            advanced one pass small out,        42338
+github,                             level 16 with dict,                 advanced one pass small out,        37902
+github,                             level 16 with dict dms,             advanced one pass small out,        37902
+github,                             level 16 with dict dds,             advanced one pass small out,        37902
+github,                             level 16 with dict copy,            advanced one pass small out,        37892
+github,                             level 16 with dict load,            advanced one pass small out,        42402
 github,                             level 19,                           advanced one pass small out,        132879
-github,                             level 19 with dict,                 advanced one pass small out,        37576
-github,                             level 19 with dict dms,             advanced one pass small out,        37576
-github,                             level 19 with dict dds,             advanced one pass small out,        37576
-github,                             level 19 with dict copy,            advanced one pass small out,        37567
-github,                             level 19 with dict load,            advanced one pass small out,        39613
+github,                             level 19 with dict,                 advanced one pass small out,        37916
+github,                             level 19 with dict dms,             advanced one pass small out,        37916
+github,                             level 19 with dict dds,             advanced one pass small out,        37916
+github,                             level 19 with dict copy,            advanced one pass small out,        37906
+github,                             level 19 with dict load,            advanced one pass small out,        39770
 github,                             no source size,                     advanced one pass small out,        136332
 github,                             no source size with dict,           advanced one pass small out,        41148
 github,                             long distance mode,                 advanced one pass small out,        136332
@@ -840,17 +840,17 @@ github.tar,                         level 13 with dict dds,             advanced
 github.tar,                         level 13 with dict copy,            advanced one pass small out,        37130
 github.tar,                         level 13 with dict load,            advanced one pass small out,        36010
 github.tar,                         level 16,                           advanced one pass small out,        40466
-github.tar,                         level 16 with dict,                 advanced one pass small out,        33374
-github.tar,                         level 16 with dict dms,             advanced one pass small out,        33206
-github.tar,                         level 16 with dict dds,             advanced one pass small out,        33206
-github.tar,                         level 16 with dict copy,            advanced one pass small out,        33374
+github.tar,                         level 16 with dict,                 advanced one pass small out,        33375
+github.tar,                         level 16 with dict dms,             advanced one pass small out,        33207
+github.tar,                         level 16 with dict dds,             advanced one pass small out,        33207
+github.tar,                         level 16 with dict copy,            advanced one pass small out,        33375
 github.tar,                         level 16 with dict load,            advanced one pass small out,        39081
-github.tar,                         level 19,                           advanced one pass small out,        32276
-github.tar,                         level 19 with dict,                 advanced one pass small out,        32712
-github.tar,                         level 19 with dict dms,             advanced one pass small out,        32555
-github.tar,                         level 19 with dict dds,             advanced one pass small out,        32555
-github.tar,                         level 19 with dict copy,            advanced one pass small out,        32712
-github.tar,                         level 19 with dict load,            advanced one pass small out,        32479
+github.tar,                         level 19,                           advanced one pass small out,        32262
+github.tar,                         level 19 with dict,                 advanced one pass small out,        32701
+github.tar,                         level 19 with dict dms,             advanced one pass small out,        32565
+github.tar,                         level 19 with dict dds,             advanced one pass small out,        32565
+github.tar,                         level 19 with dict copy,            advanced one pass small out,        32701
+github.tar,                         level 19 with dict load,            advanced one pass small out,        32428
 github.tar,                         no source size,                     advanced one pass small out,        38831
 github.tar,                         no source size with dict,           advanced one pass small out,        37995
 github.tar,                         long distance mode,                 advanced one pass small out,        40252
@@ -861,7 +861,7 @@ github.tar,                         small hash log,                     advanced
 github.tar,                         small chain log,                    advanced one pass small out,        41669
 github.tar,                         explicit params,                    advanced one pass small out,        41385
 github.tar,                         uncompressed literals,              advanced one pass small out,        41525
-github.tar,                         uncompressed literals optimal,      advanced one pass small out,        35397
+github.tar,                         uncompressed literals optimal,      advanced one pass small out,        35356
 github.tar,                         huffman literals,                   advanced one pass small out,        38853
 github.tar,                         multithreaded with advanced params, advanced one pass small out,        41525
 silesia,                            level -5,                           advanced streaming,                 6854744
@@ -884,8 +884,8 @@ silesia,                            level 11 row 2,                     advanced
 silesia,                            level 12 row 1,                     advanced streaming,                 4505658
 silesia,                            level 12 row 2,                     advanced streaming,                 4503429
 silesia,                            level 13,                           advanced streaming,                 4493990
-silesia,                            level 16,                           advanced streaming,                 4359969
-silesia,                            level 19,                           advanced streaming,                 4267082
+silesia,                            level 16,                           advanced streaming,                 4359652
+silesia,                            level 19,                           advanced streaming,                 4266582
 silesia,                            no source size,                     advanced streaming,                 4842039
 silesia,                            long distance mode,                 advanced streaming,                 4833710
 silesia,                            multithreaded,                      advanced streaming,                 4842075
@@ -895,7 +895,7 @@ silesia,                            small hash log,                     advanced
 silesia,                            small chain log,                    advanced streaming,                 4912197
 silesia,                            explicit params,                    advanced streaming,                 4795857
 silesia,                            uncompressed literals,              advanced streaming,                 5120566
-silesia,                            uncompressed literals optimal,      advanced streaming,                 4317337
+silesia,                            uncompressed literals optimal,      advanced streaming,                 4316880
 silesia,                            huffman literals,                   advanced streaming,                 5321370
 silesia,                            multithreaded with advanced params, advanced streaming,                 5120566
 silesia.tar,                        level -5,                           advanced streaming,                 6856523
@@ -918,8 +918,8 @@ silesia.tar,                        level 11 row 2,                     advanced
 silesia.tar,                        level 12 row 1,                     advanced streaming,                 4514514
 silesia.tar,                        level 12 row 2,                     advanced streaming,                 4514003
 silesia.tar,                        level 13,                           advanced streaming,                 4502956
-silesia.tar,                        level 16,                           advanced streaming,                 4360529
-silesia.tar,                        level 19,                           advanced streaming,                 4261475
+silesia.tar,                        level 16,                           advanced streaming,                 4360385
+silesia.tar,                        level 19,                           advanced streaming,                 4260939
 silesia.tar,                        no source size,                     advanced streaming,                 4859267
 silesia.tar,                        long distance mode,                 advanced streaming,                 4840452
 silesia.tar,                        multithreaded,                      advanced streaming,                 4854160
@@ -929,7 +929,7 @@ silesia.tar,                        small hash log,                     advanced
 silesia.tar,                        small chain log,                    advanced streaming,                 4917021
 silesia.tar,                        explicit params,                    advanced streaming,                 4807288
 silesia.tar,                        uncompressed literals,              advanced streaming,                 5127423
-silesia.tar,                        uncompressed literals optimal,      advanced streaming,                 4308925
+silesia.tar,                        uncompressed literals optimal,      advanced streaming,                 4308451
 silesia.tar,                        huffman literals,                   advanced streaming,                 5341712
 silesia.tar,                        multithreaded with advanced params, advanced streaming,                 5122567
 github,                             level -5,                           advanced streaming,                 204407
@@ -1033,17 +1033,17 @@ github,                             level 13 with dict dds,             advanced
 github,                             level 13 with dict copy,            advanced streaming,                 39948
 github,                             level 13 with dict load,            advanced streaming,                 42624
 github,                             level 16,                           advanced streaming,                 133209
-github,                             level 16 with dict,                 advanced streaming,                 37577
-github,                             level 16 with dict dms,             advanced streaming,                 37577
-github,                             level 16 with dict dds,             advanced streaming,                 37577
-github,                             level 16 with dict copy,            advanced streaming,                 37568
-github,                             level 16 with dict load,            advanced streaming,                 42338
+github,                             level 16 with dict,                 advanced streaming,                 37902
+github,                             level 16 with dict dms,             advanced streaming,                 37902
+github,                             level 16 with dict dds,             advanced streaming,                 37902
+github,                             level 16 with dict copy,            advanced streaming,                 37892
+github,                             level 16 with dict load,            advanced streaming,                 42402
 github,                             level 19,                           advanced streaming,                 132879
-github,                             level 19 with dict,                 advanced streaming,                 37576
-github,                             level 19 with dict dms,             advanced streaming,                 37576
-github,                             level 19 with dict dds,             advanced streaming,                 37576
-github,                             level 19 with dict copy,            advanced streaming,                 37567
-github,                             level 19 with dict load,            advanced streaming,                 39613
+github,                             level 19 with dict,                 advanced streaming,                 37916
+github,                             level 19 with dict dms,             advanced streaming,                 37916
+github,                             level 19 with dict dds,             advanced streaming,                 37916
+github,                             level 19 with dict copy,            advanced streaming,                 37906
+github,                             level 19 with dict load,            advanced streaming,                 39770
 github,                             no source size,                     advanced streaming,                 136332
 github,                             no source size with dict,           advanced streaming,                 41148
 github,                             long distance mode,                 advanced streaming,                 136332
@@ -1158,17 +1158,17 @@ github.tar,                         level 13 with dict dds,             advanced
 github.tar,                         level 13 with dict copy,            advanced streaming,                 37130
 github.tar,                         level 13 with dict load,            advanced streaming,                 36010
 github.tar,                         level 16,                           advanced streaming,                 40466
-github.tar,                         level 16 with dict,                 advanced streaming,                 33374
-github.tar,                         level 16 with dict dms,             advanced streaming,                 33206
-github.tar,                         level 16 with dict dds,             advanced streaming,                 33206
-github.tar,                         level 16 with dict copy,            advanced streaming,                 33374
+github.tar,                         level 16 with dict,                 advanced streaming,                 33375
+github.tar,                         level 16 with dict dms,             advanced streaming,                 33207
+github.tar,                         level 16 with dict dds,             advanced streaming,                 33207
+github.tar,                         level 16 with dict copy,            advanced streaming,                 33375
 github.tar,                         level 16 with dict load,            advanced streaming,                 39081
-github.tar,                         level 19,                           advanced streaming,                 32276
-github.tar,                         level 19 with dict,                 advanced streaming,                 32712
-github.tar,                         level 19 with dict dms,             advanced streaming,                 32555
-github.tar,                         level 19 with dict dds,             advanced streaming,                 32555
-github.tar,                         level 19 with dict copy,            advanced streaming,                 32712
-github.tar,                         level 19 with dict load,            advanced streaming,                 32479
+github.tar,                         level 19,                           advanced streaming,                 32262
+github.tar,                         level 19 with dict,                 advanced streaming,                 32701
+github.tar,                         level 19 with dict dms,             advanced streaming,                 32565
+github.tar,                         level 19 with dict dds,             advanced streaming,                 32565
+github.tar,                         level 19 with dict copy,            advanced streaming,                 32701
+github.tar,                         level 19 with dict load,            advanced streaming,                 32428
 github.tar,                         no source size,                     advanced streaming,                 38828
 github.tar,                         no source size with dict,           advanced streaming,                 38000
 github.tar,                         long distance mode,                 advanced streaming,                 40252
@@ -1179,7 +1179,7 @@ github.tar,                         small hash log,                     advanced
 github.tar,                         small chain log,                    advanced streaming,                 41669
 github.tar,                         explicit params,                    advanced streaming,                 41385
 github.tar,                         uncompressed literals,              advanced streaming,                 41525
-github.tar,                         uncompressed literals optimal,      advanced streaming,                 35397
+github.tar,                         uncompressed literals optimal,      advanced streaming,                 35356
 github.tar,                         huffman literals,                   advanced streaming,                 38853
 github.tar,                         multithreaded with advanced params, advanced streaming,                 41525
 silesia,                            level -5,                           old streaming,                      6854744
@@ -1194,11 +1194,11 @@ silesia,                            level 6,                            old stre
 silesia,                            level 7,                            old streaming,                      4570271
 silesia,                            level 9,                            old streaming,                      4545850
 silesia,                            level 13,                           old streaming,                      4493990
-silesia,                            level 16,                           old streaming,                      4359969
-silesia,                            level 19,                           old streaming,                      4267082
+silesia,                            level 16,                           old streaming,                      4359652
+silesia,                            level 19,                           old streaming,                      4266582
 silesia,                            no source size,                     old streaming,                      4842039
 silesia,                            uncompressed literals,              old streaming,                      4842075
-silesia,                            uncompressed literals optimal,      old streaming,                      4267082
+silesia,                            uncompressed literals optimal,      old streaming,                      4266582
 silesia,                            huffman literals,                   old streaming,                      6172207
 silesia.tar,                        level -5,                           old streaming,                      6856523
 silesia.tar,                        level -3,                           old streaming,                      6505954
@@ -1212,11 +1212,11 @@ silesia.tar,                        level 6,                            old stre
 silesia.tar,                        level 7,                            old streaming,                      4579823
 silesia.tar,                        level 9,                            old streaming,                      4555445
 silesia.tar,                        level 13,                           old streaming,                      4502956
-silesia.tar,                        level 16,                           old streaming,                      4360529
-silesia.tar,                        level 19,                           old streaming,                      4261475
+silesia.tar,                        level 16,                           old streaming,                      4360385
+silesia.tar,                        level 19,                           old streaming,                      4260939
 silesia.tar,                        no source size,                     old streaming,                      4859267
 silesia.tar,                        uncompressed literals,              old streaming,                      4859271
-silesia.tar,                        uncompressed literals optimal,      old streaming,                      4261475
+silesia.tar,                        uncompressed literals optimal,      old streaming,                      4260939
 silesia.tar,                        huffman literals,                   old streaming,                      6179056
 github,                             level -5,                           old streaming,                      204407
 github,                             level -5 with dict,                 old streaming,                      45832
@@ -1243,9 +1243,9 @@ github,                             level 9 with dict,                  old stre
 github,                             level 13,                           old streaming,                      132878
 github,                             level 13 with dict,                 old streaming,                      39900
 github,                             level 16,                           old streaming,                      133209
-github,                             level 16 with dict,                 old streaming,                      37577
+github,                             level 16 with dict,                 old streaming,                      37902
 github,                             level 19,                           old streaming,                      132879
-github,                             level 19 with dict,                 old streaming,                      37576
+github,                             level 19 with dict,                 old streaming,                      37916
 github,                             no source size,                     old streaming,                      140599
 github,                             no source size with dict,           old streaming,                      40654
 github,                             uncompressed literals,              old streaming,                      136332
@@ -1276,13 +1276,13 @@ github.tar,                         level 9 with dict,                  old stre
 github.tar,                         level 13,                           old streaming,                      35501
 github.tar,                         level 13 with dict,                 old streaming,                      37130
 github.tar,                         level 16,                           old streaming,                      40466
-github.tar,                         level 16 with dict,                 old streaming,                      33374
-github.tar,                         level 19,                           old streaming,                      32276
-github.tar,                         level 19 with dict,                 old streaming,                      32712
+github.tar,                         level 16 with dict,                 old streaming,                      33375
+github.tar,                         level 19,                           old streaming,                      32262
+github.tar,                         level 19 with dict,                 old streaming,                      32701
 github.tar,                         no source size,                     old streaming,                      38828
 github.tar,                         no source size with dict,           old streaming,                      38000
 github.tar,                         uncompressed literals,              old streaming,                      38831
-github.tar,                         uncompressed literals optimal,      old streaming,                      32276
+github.tar,                         uncompressed literals optimal,      old streaming,                      32262
 github.tar,                         huffman literals,                   old streaming,                      42560
 silesia,                            level -5,                           old streaming advanced,             6854744
 silesia,                            level -3,                           old streaming advanced,             6503319
@@ -1296,8 +1296,8 @@ silesia,                            level 6,                            old stre
 silesia,                            level 7,                            old streaming advanced,             4570271
 silesia,                            level 9,                            old streaming advanced,             4545850
 silesia,                            level 13,                           old streaming advanced,             4493990
-silesia,                            level 16,                           old streaming advanced,             4359969
-silesia,                            level 19,                           old streaming advanced,             4267082
+silesia,                            level 16,                           old streaming advanced,             4359652
+silesia,                            level 19,                           old streaming advanced,             4266582
 silesia,                            no source size,                     old streaming advanced,             4842039
 silesia,                            long distance mode,                 old streaming advanced,             4842075
 silesia,                            multithreaded,                      old streaming advanced,             4842075
@@ -1307,7 +1307,7 @@ silesia,                            small hash log,                     old stre
 silesia,                            small chain log,                    old streaming advanced,             4912197
 silesia,                            explicit params,                    old streaming advanced,             4795857
 silesia,                            uncompressed literals,              old streaming advanced,             4842075
-silesia,                            uncompressed literals optimal,      old streaming advanced,             4267082
+silesia,                            uncompressed literals optimal,      old streaming advanced,             4266582
 silesia,                            huffman literals,                   old streaming advanced,             6172207
 silesia,                            multithreaded with advanced params, old streaming advanced,             4842075
 silesia.tar,                        level -5,                           old streaming advanced,             6856523
@@ -1322,8 +1322,8 @@ silesia.tar,                        level 6,                            old stre
 silesia.tar,                        level 7,                            old streaming advanced,             4579823
 silesia.tar,                        level 9,                            old streaming advanced,             4555445
 silesia.tar,                        level 13,                           old streaming advanced,             4502956
-silesia.tar,                        level 16,                           old streaming advanced,             4360529
-silesia.tar,                        level 19,                           old streaming advanced,             4261475
+silesia.tar,                        level 16,                           old streaming advanced,             4360385
+silesia.tar,                        level 19,                           old streaming advanced,             4260939
 silesia.tar,                        no source size,                     old streaming advanced,             4859267
 silesia.tar,                        long distance mode,                 old streaming advanced,             4859271
 silesia.tar,                        multithreaded,                      old streaming advanced,             4859271
@@ -1333,7 +1333,7 @@ silesia.tar,                        small hash log,                     old stre
 silesia.tar,                        small chain log,                    old streaming advanced,             4917021
 silesia.tar,                        explicit params,                    old streaming advanced,             4807288
 silesia.tar,                        uncompressed literals,              old streaming advanced,             4859271
-silesia.tar,                        uncompressed literals optimal,      old streaming advanced,             4261475
+silesia.tar,                        uncompressed literals optimal,      old streaming advanced,             4260939
 silesia.tar,                        huffman literals,                   old streaming advanced,             6179056
 silesia.tar,                        multithreaded with advanced params, old streaming advanced,             4859271
 github,                             level -5,                           old streaming advanced,             213265
@@ -1361,9 +1361,9 @@ github,                             level 9 with dict,                  old stre
 github,                             level 13,                           old streaming advanced,             138676
 github,                             level 13 with dict,                 old streaming advanced,             39725
 github,                             level 16,                           old streaming advanced,             138575
-github,                             level 16 with dict,                 old streaming advanced,             40789
+github,                             level 16 with dict,                 old streaming advanced,             40804
 github,                             level 19,                           old streaming advanced,             132879
-github,                             level 19 with dict,                 old streaming advanced,             37576
+github,                             level 19 with dict,                 old streaming advanced,             37916
 github,                             no source size,                     old streaming advanced,             140599
 github,                             no source size with dict,           old streaming advanced,             40608
 github,                             long distance mode,                 old streaming advanced,             141104
@@ -1403,8 +1403,8 @@ github.tar,                         level 13,                           old stre
 github.tar,                         level 13 with dict,                 old streaming advanced,             35807
 github.tar,                         level 16,                           old streaming advanced,             40466
 github.tar,                         level 16 with dict,                 old streaming advanced,             38578
-github.tar,                         level 19,                           old streaming advanced,             32276
-github.tar,                         level 19 with dict,                 old streaming advanced,             32704
+github.tar,                         level 19,                           old streaming advanced,             32262
+github.tar,                         level 19 with dict,                 old streaming advanced,             32678
 github.tar,                         no source size,                     old streaming advanced,             38828
 github.tar,                         no source size with dict,           old streaming advanced,             38015
 github.tar,                         long distance mode,                 old streaming advanced,             38831
@@ -1415,7 +1415,7 @@ github.tar,                         small hash log,                     old stre
 github.tar,                         small chain log,                    old streaming advanced,             41669
 github.tar,                         explicit params,                    old streaming advanced,             41385
 github.tar,                         uncompressed literals,              old streaming advanced,             38831
-github.tar,                         uncompressed literals optimal,      old streaming advanced,             32276
+github.tar,                         uncompressed literals optimal,      old streaming advanced,             32262
 github.tar,                         huffman literals,                   old streaming advanced,             42560
 github.tar,                         multithreaded with advanced params, old streaming advanced,             38831
 github,                             level -5 with dict,                 old streaming cdict,                45832
@@ -1430,8 +1430,8 @@ github,                             level 6 with dict,                  old stre
 github,                             level 7 with dict,                  old streaming cdict,                38765
 github,                             level 9 with dict,                  old streaming cdict,                39439
 github,                             level 13 with dict,                 old streaming cdict,                39900
-github,                             level 16 with dict,                 old streaming cdict,                37577
-github,                             level 19 with dict,                 old streaming cdict,                37576
+github,                             level 16 with dict,                 old streaming cdict,                37902
+github,                             level 19 with dict,                 old streaming cdict,                37916
 github,                             no source size with dict,           old streaming cdict,                40654
 github.tar,                         level -5 with dict,                 old streaming cdict,                51286
 github.tar,                         level -3 with dict,                 old streaming cdict,                45147
@@ -1446,7 +1446,7 @@ github.tar,                         level 7 with dict,                  old stre
 github.tar,                         level 9 with dict,                  old streaming cdict,                36322
 github.tar,                         level 13 with dict,                 old streaming cdict,                36010
 github.tar,                         level 16 with dict,                 old streaming cdict,                39081
-github.tar,                         level 19 with dict,                 old streaming cdict,                32479
+github.tar,                         level 19 with dict,                 old streaming cdict,                32428
 github.tar,                         no source size with dict,           old streaming cdict,                38000
 github,                             level -5 with dict,                 old streaming advanced cdict,       46708
 github,                             level -3 with dict,                 old streaming advanced cdict,       45476
@@ -1460,8 +1460,8 @@ github,                             level 6 with dict,                  old stre
 github,                             level 7 with dict,                  old streaming advanced cdict,       38875
 github,                             level 9 with dict,                  old streaming advanced cdict,       38941
 github,                             level 13 with dict,                 old streaming advanced cdict,       39725
-github,                             level 16 with dict,                 old streaming advanced cdict,       40789
-github,                             level 19 with dict,                 old streaming advanced cdict,       37576
+github,                             level 16 with dict,                 old streaming advanced cdict,       40804
+github,                             level 19 with dict,                 old streaming advanced cdict,       37916
 github,                             no source size with dict,           old streaming advanced cdict,       40608
 github.tar,                         level -5 with dict,                 old streaming advanced cdict,       50791
 github.tar,                         level -3 with dict,                 old streaming advanced cdict,       44926
@@ -1476,5 +1476,5 @@ github.tar,                         level 7 with dict,                  old stre
 github.tar,                         level 9 with dict,                  old streaming advanced cdict,       36241
 github.tar,                         level 13 with dict,                 old streaming advanced cdict,       35807
 github.tar,                         level 16 with dict,                 old streaming advanced cdict,       38578
-github.tar,                         level 19 with dict,                 old streaming advanced cdict,       32704
+github.tar,                         level 19 with dict,                 old streaming advanced cdict,       32678
 github.tar,                         no source size with dict,           old streaming advanced cdict,       38015

From b88c593d8ff79b96390308380604f232802e0f04 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 5 Feb 2024 18:32:25 -0800
Subject: [PATCH 176/283] added or updated code comments

as suggested by @terrelln,
to make the code of the optimal parser a bit more understandable.
---
 lib/compress/zstd_compress_internal.h |  2 +-
 lib/compress/zstd_opt.c               | 22 ++++++++++++++--------
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index dae8526d461..087ea49dcf8 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -162,7 +162,7 @@ typedef struct {
     int price;  /* price from beginning of segment to this position */
     U32 off;    /* offset of previous match */
     U32 mlen;   /* length of previous match */
-    U32 litlen; /* nb of literals after previous match */
+    U32 litlen; /* nb of literals since previous match */
     U32 rep[ZSTD_REP_NUM];  /* offset history after previous match */
 } ZSTD_optimal_t;
 
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 25715eabba8..8e1be1cec18 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1129,13 +1129,20 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                 continue;
             }
 
+            /* Match found: let's store this solution, and eventually find more candidates.
+             * During this forward pass, @opt is used to store stretches,
+             * defined as "a match followed by N literals".
+             * Note how this is different from a Sequence, which is "N literals followed by a match".
+             * Storing stretches allows us to store different match predecessors
+             * for each literal position part of a literals run. */
+
             /* initialize opt[0] */
             opt[0].mlen = 0;  /* there are only literals so far */
             opt[0].litlen = litlen;
-            /* No need to include the actual price of the literals before the segment
+            /* No need to include the actual price of the literals before the first match
              * because it is static for the duration of the forward pass, and is included
-             * in every subsequent price. We include the literal length as the cost variation
-             * of litlen depends on the value of litlen.
+             * in every subsequent price. But, we include the literal length because
+             * the cost variation of litlen depends on the value of litlen.
              */
             opt[0].price = LL_PRICE(litlen);
             ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
@@ -1353,11 +1360,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             cur -= lastStretch.litlen;
         }
 
-        /* let's write the shortest path solution
-         * solution is stored in @opt,
-         * in reverse order,
-         * starting from @storeEnd (==cur+1)
-         * (effectively partially overwriting @opt).
+        /* Let's write the shortest path solution.
+         * It is stored in @opt in reverse order,
+         * starting from @storeEnd (==cur+2),
+         * effectively partially @opt overwriting.
          * Content is changed too:
          * - So far, @opt stored stretches, aka a match followed by literals
          * - Now, it will store sequences, aka literals followed by a match

From 1f87c88ecf3814ef59fa514dd7fe3522d2d400b1 Mon Sep 17 00:00:00 2001
From: sergeyandreenko 
Date: Tue, 6 Feb 2024 12:07:11 -0800
Subject: [PATCH 177/283] [Zstd] Less verbose log for patch mode.

---
 programs/fileio.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/programs/fileio.c b/programs/fileio.c
index 81d343023e3..b0d95f63ad6 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -1096,15 +1096,15 @@ static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs,
     comprParams->windowLog = MAX(ZSTD_WINDOWLOG_MIN, MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog));
     if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) {
         if (!prefs->ldmFlag)
-            DISPLAYLEVEL(1, "long mode automatically triggered\n");
+            DISPLAYLEVEL(2, "long mode automatically triggered\n");
         FIO_setLdmFlag(prefs, 1);
     }
     if (cParams.strategy >= ZSTD_btopt) {
-        DISPLAYLEVEL(1, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
-        DISPLAYLEVEL(1, "- Use --single-thread mode in the zstd cli\n");
-        DISPLAYLEVEL(1, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n");
-        DISPLAYLEVEL(1, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX);
-        DISPLAYLEVEL(1, "Also consider playing around with searchLog and hashLog\n");
+        DISPLAYLEVEL(3, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
+        DISPLAYLEVEL(3, "- Use --single-thread mode in the zstd cli\n");
+        DISPLAYLEVEL(3, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n");
+        DISPLAYLEVEL(3, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX);
+        DISPLAYLEVEL(3, "Also consider playing around with searchLog and hashLog\n");
     }
 }
 

From 22574d848df09616d07fe26b363700525cb9cce9 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 6 Feb 2024 12:15:54 -0800
Subject: [PATCH 178/283] fix issue 5921623844651008

ossfuzz managed to create a scenario which triggers an `assert`.
This fixes it, by giving +1 more space for the backward search pass.
---
 lib/compress/zstd_compress_internal.h | 2 +-
 lib/compress/zstd_opt.c               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 087ea49dcf8..e41d7b78ec6 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -168,7 +168,7 @@ typedef struct {
 
 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
 
-#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+2)
+#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3)
 typedef struct {
     /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
     unsigned* litFreq;           /* table of literals statistics, of size 256 */
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 8e1be1cec18..0449204ddd8 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1196,7 +1196,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
         /* check further positions */
         for (cur = 1; cur <= last_pos; cur++) {
             const BYTE* const inr = ip + cur;
-            assert(cur < ZSTD_OPT_NUM);
+            assert(cur <= ZSTD_OPT_NUM);
             DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
 
             /* Fix current position with one literal if cheaper */

From 3a64c69eba2592ec1cbcbed294a84019ab47dd19 Mon Sep 17 00:00:00 2001
From: Josh Kearney 
Date: Thu, 1 Feb 2024 11:21:16 -0600
Subject: [PATCH 179/283] Convert the CircleCI workflow to a GitHub Actions
 workflow

---
 .github/workflows/commit.yml  | 88 +++++++++++++++++++++++++++++++++++
 .github/workflows/nightly.yml | 64 +++++++++++++++++++++++++
 2 files changed, 152 insertions(+)
 create mode 100644 .github/workflows/commit.yml
 create mode 100644 .github/workflows/nightly.yml

diff --git a/.github/workflows/commit.yml b/.github/workflows/commit.yml
new file mode 100644
index 00000000000..83a6412b932
--- /dev/null
+++ b/.github/workflows/commit.yml
@@ -0,0 +1,88 @@
+name: facebook/zstd/commit
+on:
+  push:
+    branches:
+    - dev
+jobs:
+  short-tests-0:
+    runs-on: ubuntu-latest
+    services:
+      docker:
+        image: fbopensource/zstd-circleci-primary:0.0.1
+        options: --entrypoint /bin/bash
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install Dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install libcurl4-gnutls-dev
+    - name: Test
+      run: |
+        ./tests/test-license.py
+        cc -v
+        CFLAGS="-O0 -Werror -pedantic" make allmost; make clean
+        make c99build; make clean
+        make c11build; make clean
+        make -j regressiontest; make clean
+        make shortest; make clean
+        make cxxtest; make clean
+  short-tests-1:
+    runs-on: ubuntu-latest
+    services:
+      docker:
+        image: fbopensource/zstd-circleci-primary:0.0.1
+        options: --entrypoint /bin/bash
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install Dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install gcc-powerpc-linux-gnu gcc-arm-linux-gnueabi gcc-aarch64-linux-gnu libc6-dev-ppc64-powerpc-cross libcurl4-gnutls-dev lib64gcc-11-dev-powerpc-cross
+    - name: Test
+      run: |-
+        make gnu90build; make clean
+        make gnu99build; make clean
+        make ppc64build V=1; make clean
+        make ppcbuild   V=1; make clean
+        make armbuild   V=1; make clean
+        make aarch64build V=1; make clean
+        make -C tests test-legacy test-longmatch; make clean
+        make -C lib libzstd-nomt; make clean
+  regression-test:
+    runs-on: ubuntu-latest
+    services:
+      docker:
+        image: fbopensource/zstd-circleci-primary:0.0.1
+        options: --entrypoint /bin/bash
+    env:
+      CIRCLE_ARTIFACTS: "/tmp/circleci-artifacts"
+    steps:
+    - uses: actions/checkout@v4
+    - name: restore_cache
+      uses: actions/cache@v3
+      with:
+        key: regression-cache-{{ checksum "tests/regression/data.c" }}-v0
+        path: tests/regression/cache
+        restore-keys: regression-cache-{{ checksum "tests/regression/data.c" }}-v0
+    - name: Install Dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install libcurl4-gnutls-dev
+    - name: Regression Test
+      run: |
+        make -C programs zstd
+        make -C tests/regression test
+        mkdir -p $CIRCLE_ARTIFACTS
+        ./tests/regression/test                     \
+            --cache  tests/regression/cache         \
+            --output $CIRCLE_ARTIFACTS/results.csv  \
+            --zstd   programs/zstd
+        echo "NOTE: The new results.csv is uploaded as an artifact to this job"
+        echo "      If this fails, go to the Artifacts pane in CircleCI, "
+        echo "      download /tmp/circleci-artifacts/results.csv, and if they "
+        echo "      are still good, copy it into the repo and commit it."
+        echo "> diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv"
+        diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv
+    - uses: actions/upload-artifact@v4
+      with:
+        path: "/tmp/circleci-artifacts"
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
new file mode 100644
index 00000000000..0eb9ecbde67
--- /dev/null
+++ b/.github/workflows/nightly.yml
@@ -0,0 +1,64 @@
+name: facebook/zstd/nightly
+on:
+  schedule:
+  - cron: 0 0 * * *
+  push:
+    branches:
+    - release
+    - dev
+    - master
+jobs:
+  regression-test:
+    runs-on: ubuntu-latest
+    services:
+      docker:
+        image: fbopensource/zstd-circleci-primary:0.0.1
+        options: --entrypoint /bin/bash
+    env:
+      CIRCLE_ARTIFACTS: "/tmp/circleci-artifacts"
+    steps:
+    - uses: actions/checkout@v4
+    - uses: actions/cache@v3
+      with:
+        key: regression-cache-{{ checksum "tests/regression/data.c" }}-v0
+        path: tests/regression/cache
+        restore-keys: regression-cache-{{ checksum "tests/regression/data.c" }}-v0
+    - uses: actions/upload-artifact@v4
+      with:
+        path: "/tmp/circleci-artifacts"
+    - name: Install Dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install libcurl4-gnutls-dev
+    - name: Regression Test
+      run: |
+        make -C programs zstd
+        make -C tests/regression test
+        mkdir -p $CIRCLE_ARTIFACTS
+        ./tests/regression/test                     \
+            --cache  tests/regression/cache         \
+            --output $CIRCLE_ARTIFACTS/results.csv  \
+            --zstd   programs/zstd
+        echo "NOTE: The new results.csv is uploaded as an artifact to this job"
+        echo "      If this fails, go to the Artifacts pane in CircleCI, "
+        echo "      download /tmp/circleci-artifacts/results.csv, and if they "
+        echo "      are still good, copy it into the repo and commit it."
+        echo "> diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv"
+        diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv
+
+# Longer tests
+  #- make -C tests test-zstd-nolegacy && make clean
+  #- pyenv global 3.4.4; make -C tests versionsTest && make clean
+  #- make zlibwrapper         && make clean
+  #- gcc -v; make -C tests test32 MOREFLAGS="-I/usr/include/x86_64-linux-gnu" && make clean
+  #- make uasan               && make clean
+  #- make asan32              && make clean
+  #- make -C tests test32 CC=clang MOREFLAGS="-g -fsanitize=address -I/usr/include/x86_64-linux-gnu"
+# Valgrind tests
+  #- CFLAGS="-O1 -g" make -C zlibWrapper valgrindTest && make clean
+  #- make -C tests valgrindTest && make clean
+# ARM, AArch64, PowerPC, PowerPC64 tests
+  #- make ppctest             && make clean
+  #- make ppc64test           && make clean
+  #- make armtest             && make clean
+  #- make aarch64test         && make clean

From 695d154cac251c4ae2e2a438af21f0455a4c4149 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Thu, 8 Feb 2024 16:14:14 -0800
Subject: [PATCH 180/283] fuzz: control debuglevel from Makefile

and make the compilation faster
---
 tests/fuzz/Makefile | 2 +-
 tests/fuzz/fuzz.py  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile
index 525e396bca4..554bd10d561 100644
--- a/tests/fuzz/Makefile
+++ b/tests/fuzz/Makefile
@@ -257,7 +257,7 @@ corpora: $(patsubst %,corpora/%,$(FUZZ_TARGETS))
 seedcorpora: $(patsubst %,corpora/%_seed_corpus.zip,$(FUZZ_TARGETS))
 
 regressiontest: corpora
-	CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all
+	CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all --debug=$(DEBUGLEVEL)
 	$(PYTHON) ./fuzz.py regression all
 
 clean:
diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
index 058fca8137b..c489b8fa646 100755
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@@ -492,6 +492,7 @@ def build(args):
     subprocess.check_call(clean_cmd)
     build_cmd = [
         'make',
+        '-j',
         cc_str,
         cxx_str,
         cppflags_str,

From b0e8580dc7f71881361f3a6fe46841af9d70bedf Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Thu, 8 Feb 2024 16:38:20 -0800
Subject: [PATCH 181/283] fix fuzz issue 5131069967892480

---
 lib/compress/zstd_opt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 0449204ddd8..c2ec2a9edcd 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1216,6 +1216,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                     if ( (optLevel >= 1) /* additional check only for higher modes */
                       && (prevMatch.litlen == 0) /* replace a match */
                       && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
+                      && LIKELY(ip + cur < iend)
                     ) {
                         /* check next position, in case it would be cheaper */
                         int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);

From b921f1aad67cfc347ea7f8ef1c0afb6688bad4b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoph=20Gr=C3=BCninger?= 
Date: Sun, 11 Feb 2024 11:22:05 +0100
Subject: [PATCH 182/283] Reduce scope of variables

This improves readability, keeps variables local, and
prevents the unintended use (e.g. typo) later on.
Found by Cppcheck (variableScope)
---
 lib/compress/huf_compress.c    | 16 +++++++++-------
 lib/compress/zstd_compress.c   |  6 ++----
 lib/compress/zstd_ldm.c        |  2 +-
 lib/compress/zstd_opt.c        |  3 +--
 lib/compress/zstdmt_compress.c |  4 ++--
 programs/benchfn.c             |  4 ++--
 programs/fileio.c              | 10 +++++-----
 programs/util.c                | 10 +++++-----
 8 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c
index 1097d13d87b..ea000723209 100644
--- a/lib/compress/huf_compress.c
+++ b/lib/compress/huf_compress.c
@@ -1062,12 +1062,12 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
     const BYTE* ip = (const BYTE*) src;
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstSize;
-    BYTE* op = ostart;
     HUF_CStream_t bitC;
 
     /* init */
     if (dstSize < 8) return 0;   /* not enough space to compress */
-    { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
+    { BYTE* op = ostart;
+      size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
       if (HUF_isError(initErr)) return 0; }
 
     if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
@@ -1288,7 +1288,7 @@ unsigned HUF_optimalTableLog(
 
     {   BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
         size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
-        size_t maxBits, hSize, newSize;
+        size_t hSize, newSize;
         const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
         const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
         size_t optSize = ((size_t) ~0) - 1;
@@ -1299,12 +1299,14 @@ unsigned HUF_optimalTableLog(
         /* Search until size increases */
         for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
             DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
-            maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
-            if (ERR_isError(maxBits)) continue;
 
-            if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
+            {   size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
+                if (ERR_isError(maxBits)) continue;
+
+                if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
 
-            hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
+                hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
+            }
 
             if (ERR_isError(hSize)) continue;
 
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index f8abbbbd91b..451f2f91e6f 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -2136,7 +2136,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
             ZSTD_estimateCCtxSize_usingCCtxParams_internal(
                 ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
                 buffInSize, buffOutSize, pledgedSrcSize, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
-        int resizeWorkspace;
 
         FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
 
@@ -2145,7 +2144,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
         {   /* Check if workspace is large enough, alloc a new one if needed */
             int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
             int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
-            resizeWorkspace = workspaceTooSmall || workspaceWasteful;
+            int resizeWorkspace = workspaceTooSmall || workspaceWasteful;
             DEBUGLOG(4, "Need %zu B workspace", neededSpace);
             DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
 
@@ -5176,14 +5175,13 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
 {
     BYTE* const ostart = (BYTE*)dst;
     BYTE* op = ostart;
-    size_t fhSize = 0;
 
     DEBUGLOG(4, "ZSTD_writeEpilogue");
     RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
 
     /* special case : empty frame */
     if (cctx->stage == ZSTDcs_init) {
-        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
+        size_t fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
         FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
         dstCapacity -= fhSize;
         op += fhSize;
diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c
index 7a0792ee458..17c069fe1d7 100644
--- a/lib/compress/zstd_ldm.c
+++ b/lib/compress/zstd_ldm.c
@@ -695,7 +695,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
         /* maybeSplitSequence updates rawSeqStore->pos */
         rawSeq const sequence = maybeSplitSequence(rawSeqStore,
                                                    (U32)(iend - ip), minMatch);
-        int i;
         /* End signal */
         if (sequence.offset == 0)
             break;
@@ -708,6 +707,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
         /* Run the block compressor */
         DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
         {
+            int i;
             size_t const newLitLength =
                 blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
             ip += sequence.litLength;
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index c2ec2a9edcd..e63073e5a4f 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1372,7 +1372,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
         {   U32 const storeEnd = cur + 2;
             U32 storeStart = storeEnd;
             U32 stretchPos = cur;
-            ZSTD_optimal_t nextStretch;
 
             DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
                         last_pos, cur); (void)last_pos;
@@ -1390,7 +1389,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                 storeStart = storeEnd;
             }
             while (1) {
-                nextStretch = opt[stretchPos];
+                ZSTD_optimal_t nextStretch = opt[stretchPos];
                 opt[storeStart].litlen = nextStretch.litlen;
                 DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
                             opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index baa0f006977..e86fdb2bafd 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -105,10 +105,10 @@ typedef struct ZSTDMT_bufferPool_s {
 
 static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
 {
-    unsigned u;
     DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
     if (!bufPool) return;   /* compatibility with free on NULL */
     if (bufPool->buffers) {
+        unsigned u;
         for (u=0; utotalBuffers; u++) {
             DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->buffers[u].start);
             ZSTD_customFree(bufPool->buffers[u].start, bufPool->cMem);
@@ -364,10 +364,10 @@ typedef struct {
 /* note : all CCtx borrowed from the pool must be reverted back to the pool _before_ freeing the pool */
 static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
 {
-    int cid;
     if (!pool) return;
     ZSTD_pthread_mutex_destroy(&pool->poolMutex);
     if (pool->cctxs) {
+        int cid;
         for (cid=0; cidtotalCCtx; cid++)
             ZSTD_freeCCtx(pool->cctxs[cid]);  /* free compatible with NULL */
         ZSTD_customFree(pool->cctxs, pool->cMem);
diff --git a/programs/benchfn.c b/programs/benchfn.c
index 8e6726f8dc6..3e042cf38f8 100644
--- a/programs/benchfn.c
+++ b/programs/benchfn.c
@@ -108,7 +108,6 @@ static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
 BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
                                    unsigned nbLoops)
 {
-    size_t dstSize = 0;
     nbLoops += !nbLoops;   /* minimum nbLoops is 1 */
 
     /* init */
@@ -118,7 +117,8 @@ BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
     }   }
 
     /* benchmark */
-    {   UTIL_time_t const clockStart = UTIL_getTime();
+    {   size_t dstSize = 0;
+        UTIL_time_t const clockStart = UTIL_getTime();
         unsigned loopNb, blockNb;
         if (p.initFn != NULL) p.initFn(p.initPayload);
         for (loopNb = 0; loopNb < nbLoops; loopNb++) {
diff --git a/programs/fileio.c b/programs/fileio.c
index b0d95f63ad6..a7597d893d1 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -1839,7 +1839,6 @@ static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
     int closeDstFile = 0;
     int result;
     int transferStat = 0;
-    FILE *dstFile;
     int dstFd = -1;
 
     assert(AIO_ReadPool_getFile(ress.readCtx) != NULL);
@@ -1854,10 +1853,11 @@ static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
 
         closeDstFile = 1;
         DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
-        dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFileInitialPermissions);
-        if (dstFile==NULL) return 1;  /* could not open dstFileName */
-        dstFd = fileno(dstFile);
-        AIO_WritePool_setFile(ress.writeCtx, dstFile);
+        {   FILE *dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFileInitialPermissions);
+            if (dstFile==NULL) return 1;  /* could not open dstFileName */
+            dstFd = fileno(dstFile);
+            AIO_WritePool_setFile(ress.writeCtx, dstFile);
+        }
         /* Must only be added after FIO_openDstFile() succeeds.
          * Otherwise we may delete the destination file if it already exists,
          * and the user presses Ctrl-C when asked if they wish to overwrite.
diff --git a/programs/util.c b/programs/util.c
index c9031e91d35..862fc80080d 100644
--- a/programs/util.c
+++ b/programs/util.c
@@ -660,7 +660,6 @@ UTIL_createFileNamesTable_fromFileName(const char* inputFileName)
     size_t nbFiles = 0;
     char* buf;
     size_t bufSize;
-    size_t pos = 0;
     stat_t statbuf;
 
     if (!UTIL_stat(inputFileName, &statbuf) || !UTIL_isRegularFileStat(&statbuf))
@@ -687,12 +686,13 @@ UTIL_createFileNamesTable_fromFileName(const char* inputFileName)
     {   const char** filenamesTable = (const char**) malloc(nbFiles * sizeof(*filenamesTable));
         CONTROL(filenamesTable != NULL);
 
-        {   size_t fnb;
-            for (fnb = 0, pos = 0; fnb < nbFiles; fnb++) {
+        {   size_t fnb, pos = 0;
+            for (fnb = 0; fnb < nbFiles; fnb++) {
                 filenamesTable[fnb] = buf+pos;
                 pos += strlen(buf+pos)+1;  /* +1 for the finishing `\0` */
-        }   }
+            }
         assert(pos <= bufSize);
+        }
 
         return UTIL_assembleFileNamesTable(filenamesTable, nbFiles, buf);
     }
@@ -753,7 +753,7 @@ void UTIL_refFilename(FileNamesTable* fnt, const char* filename)
 
 static size_t getTotalTableSize(FileNamesTable* table)
 {
-    size_t fnb = 0, totalSize = 0;
+    size_t fnb, totalSize = 0;
     for(fnb = 0 ; fnb < table->tableSize && table->fileNames[fnb] ; ++fnb) {
         totalSize += strlen(table->fileNames[fnb]) + 1; /* +1 to add '\0' at the end of each fileName */
     }

From 927d0799442c42ece088dcf339ca25968274f5a0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 12 Feb 2024 05:17:13 +0000
Subject: [PATCH 183/283] Bump actions/upload-artifact from 4.3.0 to 4.3.1

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.0 to 4.3.1.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/26f96dfa697d77e81fd5907df203aa23a56210a8...5d5d22a31266ced268874388b861e4b58bb5c2f3)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
---
 .github/workflows/dev-long-tests.yml    | 2 +-
 .github/workflows/scorecards.yml        | 2 +-
 .github/workflows/windows-artifacts.yml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml
index 29db7316ea3..aed68bb938e 100644
--- a/.github/workflows/dev-long-tests.yml
+++ b/.github/workflows/dev-long-tests.yml
@@ -290,7 +290,7 @@ jobs:
         dry-run: false
         sanitizer: ${{ matrix.sanitizer }}
     - name: Upload Crash
-      uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 # tag=v4.3.0
+      uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # tag=v4.3.1
       if: failure() && steps.build.outcome == 'success'
       with:
         name: ${{ matrix.sanitizer }}-artifacts
diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 770f9d4a382..0ccd87625bc 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -51,7 +51,7 @@ jobs:
       # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
       # format to the repository Actions tab.
       - name: "Upload artifact"
-        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 # tag=v4.3.0
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # tag=v4.3.1
         with:
           name: SARIF file
           path: results.sarif
diff --git a/.github/workflows/windows-artifacts.yml b/.github/workflows/windows-artifacts.yml
index b689bf560a7..52bc90a4926 100644
--- a/.github/workflows/windows-artifacts.yml
+++ b/.github/workflows/windows-artifacts.yml
@@ -52,7 +52,7 @@ jobs:
         cd ..
 
     - name: Publish zstd-$VERSION-${{matrix.ziparch}}.zip
-      uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 # tag=v4.3.0
+      uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # tag=v4.3.1
       with:
         path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip
         name: zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip

From b6805c54d67f902d32afecc5ca153cd81a77764f Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" 
Date: Tue, 13 Feb 2024 11:50:55 -0500
Subject: [PATCH 184/283] Add SECURITY.md File

This just adds a copy of the Meta default SECURITY.md that we can then modify.
---
 SECURITY.md | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 SECURITY.md

diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 00000000000..4e5f09cbe1a
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,7 @@
+# Reporting and Fixing Security Issues
+
+Please do not open GitHub issues or pull requests - this makes the problem immediately visible to everyone, including malicious actors. Security issues in this open source project can be safely reported via the Meta Bug Bounty program:
+
+https://www.facebook.com/whitehat
+
+Meta's security team will triage your report and determine whether or not is it eligible for a bounty under our program.

From e13d099bf881d69d6cf8bcd5cd4f677e1ce86bea Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" 
Date: Tue, 13 Feb 2024 11:51:37 -0500
Subject: [PATCH 185/283] Advertise Availability of Security Vulnerability
 Notifications

---
 SECURITY.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/SECURITY.md b/SECURITY.md
index 4e5f09cbe1a..a5f9a7e1fdb 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -5,3 +5,11 @@ Please do not open GitHub issues or pull requests - this makes the problem immed
 https://www.facebook.com/whitehat
 
 Meta's security team will triage your report and determine whether or not is it eligible for a bounty under our program.
+
+# Receiving Vulnerability Notifications
+
+In the case that a significant security vulnerability is reported to us or discovered by us---without being publicly known---we will, at our discretion, notify high-profile, high-exposure users of Zstandard ahead of our public disclosure of the issue and associated fix.
+
+If you believe your project would benefit from inclusion in this list, please reach out to one of the maintainers.
+
+

From 1e046ce7fa6ebabb48a182009df6e4fe90fa2740 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 20 Feb 2024 00:12:32 -0800
Subject: [PATCH 186/283] increase vocabulary size

makes compression a bit less good,
hence a bit more comparable with real text (though still too easy to compress).
level 6 is now stronger than level 4, by a hair.
However, there is still a ratio dip at level 5.
---
 .gitignore       |   7 +-
 programs/lorem.c | 259 +++++++++++++++++++++++++----------------------
 2 files changed, 142 insertions(+), 124 deletions(-)

diff --git a/.gitignore b/.gitignore
index 048a4c4e492..34e18b44395 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,12 +39,15 @@ buck-out/
 build-*
 *.gcda
 
+# IDE
+.clang_complete
+compile_flags.txt
+.clang-format
+
 # Other files
 .directory
 _codelite/
 _zstdbench/
-.clang_complete
-compile_flags.txt
 *.idea
 *.swp
 .DS_Store
diff --git a/programs/lorem.c b/programs/lorem.c
index 59dd6da629f..cf452ca8b4b 100644
--- a/programs/lorem.c
+++ b/programs/lorem.c
@@ -8,7 +8,6 @@
  * You may select, at your option, one of the above-listed licenses.
  */
 
-
 /* Implementation notes:
  *
  * This is a very simple lorem ipsum generator
@@ -32,83 +31,97 @@
  */
 
 #include "lorem.h"
-#include   /* memcpy */
-#include   /* INT_MAX */
 #include 
+#include  /* INT_MAX */
+#include  /* memcpy */
 
 #define WORD_MAX_SIZE 20
 
 /* Define the word pool */
-static const char *words[] = {
-    "lorem",       "ipsum",      "dolor",      "sit",          "amet",
-    "consectetur", "adipiscing", "elit",       "sed",          "do",
-    "eiusmod",     "tempor",     "incididunt", "ut",           "labore",
-    "et",          "dolore",     "magna",      "aliqua",       "dis",
-    "lectus",      "vestibulum", "mattis",     "ullamcorper",  "velit",
-    "commodo",     "a",          "lacus",      "arcu",         "magnis",
-    "parturient",  "montes",     "nascetur",   "ridiculus",    "mus",
-    "mauris",      "nulla",      "malesuada",  "pellentesque", "eget",
-    "gravida",     "in",         "dictum",     "non",          "erat",
-    "nam",         "voluptat",   "maecenas",   "blandit",      "aliquam",
-    "etiam",       "enim",       "lobortis",   "scelerisque",  "fermentum",
-    "dui",         "faucibus",   "ornare",     "at",           "elementum",
-    "eu",          "facilisis",  "odio",       "morbi",        "quis",
-    "eros",        "donec",      "ac",         "orci",         "purus",
-    "turpis",      "cursus",     "leo",        "vel",          "porta"};
-
-/* simple distribution that favors small words :
+static const char* words[] = {
+    "lorem",        "ipsum",      "dolor",       "sit",          "amet",
+    "consectetur",  "adipiscing", "elit",        "sed",          "do",
+    "eiusmod",      "tempor",     "incididunt",  "ut",           "labore",
+    "et",           "dolore",     "magna",       "aliqua",       "dis",
+    "lectus",       "vestibulum", "mattis",      "ullamcorper",  "velit",
+    "commodo",      "a",          "lacus",       "arcu",         "magnis",
+    "parturient",   "montes",     "nascetur",    "ridiculus",    "mus",
+    "mauris",       "nulla",      "malesuada",   "pellentesque", "eget",
+    "gravida",      "in",         "dictum",      "non",          "erat",
+    "nam",          "voluptat",   "maecenas",    "blandit",      "aliquam",
+    "etiam",        "enim",       "lobortis",    "scelerisque",  "fermentum",
+    "dui",          "faucibus",   "ornare",      "at",           "elementum",
+    "eu",           "facilisis",  "odio",        "morbi",        "quis",
+    "eros",         "donec",      "ac",          "orci",         "purus",
+    "turpis",       "cursus",     "leo",         "vel",          "porta",
+    "consequat",    "interdum",   "varius",      "vulputate",    "aliquet",
+    "pharetra",     "nunc",       "auctor",      "urna",         "id",
+    "metus",        "viverra",    "nibh",        "cras",         "mi",
+    "unde",         "omnis",      "iste",        "natus",        "error",
+    "perspiciatis", "voluptatem", "accusantium", "doloremque",   "laudantium",
+    "totam",        "rem",        "aperiam",     "eaque",        "ipsa",
+    "quae",         "ab",         "illo",        "inventore",    "veritatis",
+    "quasi",        "architecto", "beatae",      "vitae",        "dicta",
+    "sunt",         "explicabo",  "nemo",        "ipsam",        "quia",
+    "voluptas",     "aspernatur", "aut",         "odit",         "fugit"
+};
+
+/* simple 1-dimension distribution that favors small words :
  * 1 letter : weight 3
  * 2-3 letters : weight 2
  * 4+ letters : weight 1
- * This is expected to be a bit more difficult to compress */
+ */
 static const int distrib[] = {
-    0, 1, 2, 3, 3, 4, 5, 6, 7, 8,
-    8,9, 9, 10, 11, 12, 13, 13, 14, 15,
-    15, 16, 17, 18, 19, 19, 20, 21, 22, 23,
-    24, 25, 26, 26, 26, 27, 28, 29, 30, 31,
-    32, 33, 34, 34, 35, 36, 37, 38, 39, 40,
-    41, 41, 42, 43, 43, 44, 45, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55, 55, 56,
-    57, 58, 58, 59, 60, 60, 61, 62, 63, 64,
-    65, 66, 67, 67, 68, 69, 70, 71, 72, 72,
-    73, 73, 74 };
+    0,   1,   2,   3,   3,   4,   5,   6,   7,   8,   8,   9,   9,   10,  11,
+    12,  13,  13,  14,  15,  15,  16,  17,  18,  19,  19,  20,  21,  22,  23,
+    24,  25,  26,  26,  26,  27,  28,  29,  30,  31,  32,  33,  34,  34,  35,
+    36,  37,  38,  39,  40,  41,  41,  42,  43,  43,  44,  45,  45,  46,  47,
+    48,  49,  50,  51,  52,  53,  54,  55,  55,  56,  57,  58,  58,  59,  60,
+    60,  61,  62,  63,  64,  65,  66,  67,  67,  68,  69,  70,  71,  72,  72,
+    73,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  84,  85,
+    86,  87,  88,  89,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,
+    100, 101, 101, 102, 103, 104, 105, 106, 106, 107, 108, 109, 110, 111, 112,
+    113, 114, 115, 116, 117, 118, 119, 129, 121, 122, 123, 124,
+};
 static const unsigned distribCount = sizeof(distrib) / sizeof(distrib[0]);
 
 /* Note: this unit only works when invoked sequentially.
  * No concurrent access is allowed */
-static char *g_ptr = NULL;
-static size_t g_nbChars = 0;
-static size_t g_maxChars = 10000000;
+static char* g_ptr         = NULL;
+static size_t g_nbChars    = 0;
+static size_t g_maxChars   = 10000000;
 static unsigned g_randRoot = 0;
 
 #define RDG_rotl32(x, r) ((x << r) | (x >> (32 - r)))
-static unsigned LOREM_rand(unsigned range) {
-  static const unsigned prime1 = 2654435761U;
-  static const unsigned prime2 = 2246822519U;
-  unsigned rand32 = g_randRoot;
-  rand32 *= prime1;
-  rand32 ^= prime2;
-  rand32 = RDG_rotl32(rand32, 13);
-  g_randRoot = rand32;
-  return (unsigned)(((unsigned long long)rand32 * range) >> 32);
+static unsigned LOREM_rand(unsigned range)
+{
+    static const unsigned prime1 = 2654435761U;
+    static const unsigned prime2 = 2246822519U;
+    unsigned rand32              = g_randRoot;
+    rand32 *= prime1;
+    rand32 ^= prime2;
+    rand32     = RDG_rotl32(rand32, 13);
+    g_randRoot = rand32;
+    return (unsigned)(((unsigned long long)rand32 * range) >> 32);
 }
 
-static void writeLastCharacters(void) {
-  size_t lastChars = g_maxChars - g_nbChars;
-  assert(g_maxChars >= g_nbChars);
-  if (lastChars == 0)
-    return;
-  g_ptr[g_nbChars++] = '.';
-  if (lastChars > 2) {
-    memset(g_ptr + g_nbChars, ' ', lastChars - 2);
-  }
-  if (lastChars > 1) {
-    g_ptr[g_maxChars-1] = '\n';
-  }
-  g_nbChars = g_maxChars;
+static void writeLastCharacters(void)
+{
+    size_t lastChars = g_maxChars - g_nbChars;
+    assert(g_maxChars >= g_nbChars);
+    if (lastChars == 0)
+        return;
+    g_ptr[g_nbChars++] = '.';
+    if (lastChars > 2) {
+        memset(g_ptr + g_nbChars, ' ', lastChars - 2);
+    }
+    if (lastChars > 1) {
+        g_ptr[g_maxChars - 1] = '\n';
+    }
+    g_nbChars = g_maxChars;
 }
 
-static void generateWord(const char *word, const char *separator, int upCase)
+static void generateWord(const char* word, const char* separator, int upCase)
 {
     size_t const len = strlen(word) + strlen(separator);
     if (g_nbChars + len > g_maxChars) {
@@ -118,90 +131,92 @@ static void generateWord(const char *word, const char *separator, int upCase)
     memcpy(g_ptr + g_nbChars, word, strlen(word));
     if (upCase) {
         static const char toUp = 'A' - 'a';
-        g_ptr[g_nbChars] = (char)(g_ptr[g_nbChars] + toUp);
+        g_ptr[g_nbChars]       = (char)(g_ptr[g_nbChars] + toUp);
     }
     g_nbChars += strlen(word);
     memcpy(g_ptr + g_nbChars, separator, strlen(separator));
     g_nbChars += strlen(separator);
 }
 
-static int about(unsigned target) {
-  return (int)(LOREM_rand(target) + LOREM_rand(target) + 1);
+static int about(unsigned target)
+{
+    return (int)(LOREM_rand(target) + LOREM_rand(target) + 1);
 }
 
 /* Function to generate a random sentence */
-static void generateSentence(int nbWords) {
-  int commaPos = about(9);
-  int comma2 = commaPos + about(7);
-  int i;
-  for (i = 0; i < nbWords; i++) {
-    int const wordID = distrib[LOREM_rand(distribCount)];
-    const char *const word = words[wordID];
-    const char* sep = " ";
-    if (i == commaPos)
-      sep = ", ";
-    if (i == comma2)
-      sep = ", ";
-    if (i == nbWords - 1)
-      sep = ". ";
-    generateWord(word, sep, i==0);
-  }
+static void generateSentence(int nbWords)
+{
+    int commaPos = about(9);
+    int comma2   = commaPos + about(7);
+    int i;
+    for (i = 0; i < nbWords; i++) {
+        int const wordID       = distrib[LOREM_rand(distribCount)];
+        const char* const word = words[wordID];
+        const char* sep        = " ";
+        if (i == commaPos)
+            sep = ", ";
+        if (i == comma2)
+            sep = ", ";
+        if (i == nbWords - 1)
+            sep = ". ";
+        generateWord(word, sep, i == 0);
+    }
 }
 
-static void generateParagraph(int nbSentences) {
-  int i;
-  for (i = 0; i < nbSentences; i++) {
-    int wordsPerSentence = about(8);
-    generateSentence(wordsPerSentence);
-  }
-  if (g_nbChars < g_maxChars) {
-    g_ptr[g_nbChars++] = '\n';
-  }
-  if (g_nbChars < g_maxChars) {
-    g_ptr[g_nbChars++] = '\n';
-  }
+static void generateParagraph(int nbSentences)
+{
+    int i;
+    for (i = 0; i < nbSentences; i++) {
+        int wordsPerSentence = about(8);
+        generateSentence(wordsPerSentence);
+    }
+    if (g_nbChars < g_maxChars) {
+        g_ptr[g_nbChars++] = '\n';
+    }
+    if (g_nbChars < g_maxChars) {
+        g_ptr[g_nbChars++] = '\n';
+    }
 }
 
 /* It's "common" for lorem ipsum generators to start with the same first
  * pre-defined sentence */
-static void generateFirstSentence(void) {
-  int i;
-  for (i = 0; i < 18; i++) {
-    const char *word = words[i];
-    const char *separator = " ";
-    if (i == 4)
-      separator = ", ";
-    if (i == 7)
-      separator = ", ";
-    generateWord(word, separator, i==0);
-  }
-  generateWord(words[18], ". ", 0);
+static void generateFirstSentence(void)
+{
+    int i;
+    for (i = 0; i < 18; i++) {
+        const char* word      = words[i];
+        const char* separator = " ";
+        if (i == 4)
+            separator = ", ";
+        if (i == 7)
+            separator = ", ";
+        generateWord(word, separator, i == 0);
+    }
+    generateWord(words[18], ". ", 0);
 }
 
-size_t LOREM_genBlock(void* buffer, size_t size,
-                      unsigned seed,
-                      int first, int fill)
+size_t
+LOREM_genBlock(void* buffer, size_t size, unsigned seed, int first, int fill)
 {
-  g_ptr = (char*)buffer;
-  assert(size < INT_MAX);
-  g_maxChars = size;
-  g_nbChars = 0;
-  g_randRoot = seed;
-  if (first) {
-    generateFirstSentence();
-  }
-  while (g_nbChars < g_maxChars) {
-    int sentencePerParagraph = about(7);
-    generateParagraph(sentencePerParagraph);
-    if (!fill)
-      break; /* only generate one paragraph in not-fill mode */
-  }
-  g_ptr = NULL;
-  return g_nbChars;
+    g_ptr = (char*)buffer;
+    assert(size < INT_MAX);
+    g_maxChars = size;
+    g_nbChars  = 0;
+    g_randRoot = seed;
+    if (first) {
+        generateFirstSentence();
+    }
+    while (g_nbChars < g_maxChars) {
+        int sentencePerParagraph = about(7);
+        generateParagraph(sentencePerParagraph);
+        if (!fill)
+            break; /* only generate one paragraph in not-fill mode */
+    }
+    g_ptr = NULL;
+    return g_nbChars;
 }
 
 void LOREM_genBuffer(void* buffer, size_t size, unsigned seed)
 {
-  LOREM_genBlock(buffer, size, seed, 1, 1);
+    LOREM_genBlock(buffer, size, seed, 1, 1);
 }
-

From 40874d4aea44bc9e1efd2ce14b98ea19d1d2e42d Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 20 Feb 2024 00:30:29 -0800
Subject: [PATCH 187/283] enriched vocabulary again

using real latin sentences from Cicero.

Compression ratio lower again, closer to "real" text,

now level 6 is way better than level 4.

level 5 is still lower than level 4,
but at least it's now higher than level 3.
---
 programs/lorem.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/programs/lorem.c b/programs/lorem.c
index cf452ca8b4b..5f0aba41afc 100644
--- a/programs/lorem.c
+++ b/programs/lorem.c
@@ -63,7 +63,17 @@ static const char* words[] = {
     "quae",         "ab",         "illo",        "inventore",    "veritatis",
     "quasi",        "architecto", "beatae",      "vitae",        "dicta",
     "sunt",         "explicabo",  "nemo",        "ipsam",        "quia",
-    "voluptas",     "aspernatur", "aut",         "odit",         "fugit"
+    "voluptas",     "aspernatur", "aut",         "odit",         "fugit",
+    "consequuntur", "magni",      "dolores",     "eos",          "qui",
+    "ratione",      "sequi",      "nesciunt",    "neque",        "porro",
+    "quisquam",     "est",        "dolorem",     "adipisci",     "numquam",
+    "eius",         "modi",       "tempora",     "incidunt",     "magnam",
+    "quaerat",      "ad",         "minima",      "veniam",       "nostrum",
+    "ullam",        "corporis",   "suscipit",    "laboriosam",   "nisi",
+    "aliquid",      "ex",         "ea",          "commodi",      "consequatur",
+    "autem",        "eum",        "iure",        "voluptate",    "esse",
+    "quam",         "nihil",      "molestiae",   "illum",        "fugiat",
+    "quo",          "pariatur"
 };
 
 /* simple 1-dimension distribution that favors small words :
@@ -81,7 +91,11 @@ static const int distrib[] = {
     73,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  84,  85,
     86,  87,  88,  89,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,
     100, 101, 101, 102, 103, 104, 105, 106, 106, 107, 108, 109, 110, 111, 112,
-    113, 114, 115, 116, 117, 118, 119, 129, 121, 122, 123, 124,
+    113, 114, 115, 116, 117, 118, 119, 129, 121, 122, 123, 124, 125, 126, 127,
+    128, 128, 129, 129, 130, 131, 132, 133, 134, 135, 136, 136, 137, 138, 139,
+    140, 141, 142, 143, 144, 145, 146, 146, 147, 148, 149, 150, 151, 152, 153,
+    154, 155, 156, 156, 157, 157, 158, 159, 160, 161, 161, 162, 163, 164, 165,
+    166, 167, 168, 169, 170, 170, 171,
 };
 static const unsigned distribCount = sizeof(distrib) / sizeof(distrib[0]);
 

From 5a1bb4a4e0aaba722e57cdca46486bc3c6d7e457 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 20 Feb 2024 00:37:21 -0800
Subject: [PATCH 188/283] add question marks

and (slightly) longer sentences.
---
 programs/lorem.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/programs/lorem.c b/programs/lorem.c
index 5f0aba41afc..8f2596e79b9 100644
--- a/programs/lorem.c
+++ b/programs/lorem.c
@@ -160,8 +160,10 @@ static int about(unsigned target)
 /* Function to generate a random sentence */
 static void generateSentence(int nbWords)
 {
-    int commaPos = about(9);
-    int comma2   = commaPos + about(7);
+    int commaPos       = about(9);
+    int comma2         = commaPos + about(7);
+    int qmark          = (LOREM_rand(11) == 7);
+    const char* endSep = qmark ? "? " : ". ";
     int i;
     for (i = 0; i < nbWords; i++) {
         int const wordID       = distrib[LOREM_rand(distribCount)];
@@ -172,7 +174,7 @@ static void generateSentence(int nbWords)
         if (i == comma2)
             sep = ", ";
         if (i == nbWords - 1)
-            sep = ". ";
+            sep = endSep;
         generateWord(word, sep, i == 0);
     }
 }
@@ -181,7 +183,7 @@ static void generateParagraph(int nbSentences)
 {
     int i;
     for (i = 0; i < nbSentences; i++) {
-        int wordsPerSentence = about(8);
+        int wordsPerSentence = about(10);
         generateSentence(wordsPerSentence);
     }
     if (g_nbChars < g_maxChars) {

From 3dbd861b7dc05bc4291f9de222e397e50fb4c32b Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 20 Feb 2024 12:26:37 -0800
Subject: [PATCH 189/283] runtime weight distribution table

and made small words a bit more common.
---
 programs/lorem.c | 83 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 57 insertions(+), 26 deletions(-)

diff --git a/programs/lorem.c b/programs/lorem.c
index 8f2596e79b9..49408b5a1e3 100644
--- a/programs/lorem.c
+++ b/programs/lorem.c
@@ -38,7 +38,7 @@
 #define WORD_MAX_SIZE 20
 
 /* Define the word pool */
-static const char* words[] = {
+static const char* kWords[] = {
     "lorem",        "ipsum",      "dolor",       "sit",          "amet",
     "consectetur",  "adipiscing", "elit",        "sed",          "do",
     "eiusmod",      "tempor",     "incididunt",  "ut",           "labore",
@@ -75,29 +75,56 @@ static const char* words[] = {
     "quam",         "nihil",      "molestiae",   "illum",        "fugiat",
     "quo",          "pariatur"
 };
+static const unsigned kNbWords = sizeof(kWords) / sizeof(kWords[0]);
 
-/* simple 1-dimension distribution that favors small words :
- * 1 letter : weight 3
- * 2-3 letters : weight 2
- * 4+ letters : weight 1
+/* simple 1-dimension distribution, based on word's length, favors small words
  */
-static const int distrib[] = {
-    0,   1,   2,   3,   3,   4,   5,   6,   7,   8,   8,   9,   9,   10,  11,
-    12,  13,  13,  14,  15,  15,  16,  17,  18,  19,  19,  20,  21,  22,  23,
-    24,  25,  26,  26,  26,  27,  28,  29,  30,  31,  32,  33,  34,  34,  35,
-    36,  37,  38,  39,  40,  41,  41,  42,  43,  43,  44,  45,  45,  46,  47,
-    48,  49,  50,  51,  52,  53,  54,  55,  55,  56,  57,  58,  58,  59,  60,
-    60,  61,  62,  63,  64,  65,  66,  67,  67,  68,  69,  70,  71,  72,  72,
-    73,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  84,  85,
-    86,  87,  88,  89,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,
-    100, 101, 101, 102, 103, 104, 105, 106, 106, 107, 108, 109, 110, 111, 112,
-    113, 114, 115, 116, 117, 118, 119, 129, 121, 122, 123, 124, 125, 126, 127,
-    128, 128, 129, 129, 130, 131, 132, 133, 134, 135, 136, 136, 137, 138, 139,
-    140, 141, 142, 143, 144, 145, 146, 146, 147, 148, 149, 150, 151, 152, 153,
-    154, 155, 156, 156, 157, 157, 158, 159, 160, 161, 161, 162, 163, 164, 165,
-    166, 167, 168, 169, 170, 170, 171,
-};
-static const unsigned distribCount = sizeof(distrib) / sizeof(distrib[0]);
+static const int kWeights[]      = { 0, 8, 6, 4, 3, 2 };
+static const unsigned kNbWeights = sizeof(kWeights) / sizeof(kWeights[0]);
+
+#define DISTRIB_SIZE_MAX 500
+static int g_distrib[DISTRIB_SIZE_MAX] = { 0 };
+static unsigned g_distribCount         = 0;
+
+static void countFreqs(
+        const char* words[],
+        size_t nbWords,
+        const int* weights,
+        unsigned long nbWeights)
+{
+    unsigned total = 0;
+    size_t w;
+    for (w = 0; w < nbWords; w++) {
+        unsigned long len = strlen(words[w]);
+        int lmax;
+        if (len >= nbWeights)
+            len = nbWeights - 1;
+        lmax = weights[len];
+        total += (unsigned)lmax;
+    }
+    g_distribCount = total;
+    assert(g_distribCount <= DISTRIB_SIZE_MAX);
+}
+
+static void init_word_distrib(
+        const char* words[],
+        size_t nbWords,
+        const int* weights,
+        unsigned long nbWeights)
+{
+    size_t w, d = 0;
+    countFreqs(words, nbWords, weights, nbWeights);
+    for (w = 0; w < nbWords; w++) {
+        unsigned long len = strlen(words[w]);
+        int l, lmax;
+        if (len >= nbWeights)
+            len = nbWeights - 1;
+        lmax = weights[len];
+        for (l = 0; l < lmax; l++) {
+            g_distrib[d++] = (int)w;
+        }
+    }
+}
 
 /* Note: this unit only works when invoked sequentially.
  * No concurrent access is allowed */
@@ -166,8 +193,8 @@ static void generateSentence(int nbWords)
     const char* endSep = qmark ? "? " : ". ";
     int i;
     for (i = 0; i < nbWords; i++) {
-        int const wordID       = distrib[LOREM_rand(distribCount)];
-        const char* const word = words[wordID];
+        int const wordID       = g_distrib[LOREM_rand(g_distribCount)];
+        const char* const word = kWords[wordID];
         const char* sep        = " ";
         if (i == commaPos)
             sep = ", ";
@@ -200,7 +227,7 @@ static void generateFirstSentence(void)
 {
     int i;
     for (i = 0; i < 18; i++) {
-        const char* word      = words[i];
+        const char* word      = kWords[i];
         const char* separator = " ";
         if (i == 4)
             separator = ", ";
@@ -208,7 +235,7 @@ static void generateFirstSentence(void)
             separator = ", ";
         generateWord(word, separator, i == 0);
     }
-    generateWord(words[18], ". ", 0);
+    generateWord(kWords[18], ". ", 0);
 }
 
 size_t
@@ -219,6 +246,10 @@ LOREM_genBlock(void* buffer, size_t size, unsigned seed, int first, int fill)
     g_maxChars = size;
     g_nbChars  = 0;
     g_randRoot = seed;
+    if (g_distribCount == 0) {
+        init_word_distrib(kWords, kNbWords, kWeights, kNbWeights);
+    }
+
     if (first) {
         generateFirstSentence();
     }

From 7003c9905e0c80aafe00ef485e586f859707c04c Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 20 Feb 2024 13:27:36 -0800
Subject: [PATCH 190/283] increase word dictionary

for higher variety of messages.
Now, level 5 compresses better than level 4 (by a hair).
---
 programs/lorem.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/programs/lorem.c b/programs/lorem.c
index 49408b5a1e3..2fb977b07d7 100644
--- a/programs/lorem.c
+++ b/programs/lorem.c
@@ -73,7 +73,23 @@ static const char* kWords[] = {
     "aliquid",      "ex",         "ea",          "commodi",      "consequatur",
     "autem",        "eum",        "iure",        "voluptate",    "esse",
     "quam",         "nihil",      "molestiae",   "illum",        "fugiat",
-    "quo",          "pariatur"
+    "quo",          "pariatur",   "vero",        "accusamus",    "iusto",
+    "dignissimos",  "ducimus",    "blanditiis",  "praesentium",  "voluptatum",
+    "deleniti",     "atque",      "corrupti",    "quos",         "quas",
+    "molestias",    "excepturi",  "sint",        "occaecati",    "cupiditate",
+    "provident",    "similique",  "culpa",       "officia",      "deserunt",
+    "mollitia",     "animi",      "laborum",     "dolorum",      "fuga",
+    "harum",        "quidem",     "rerum",       "facilis",      "expedita",
+    "distinctio",   "libero",     "tempore",     "cum",          "soluta",
+    "nobis",        "eligendi",   "optio",       "cumque",       "impedit",
+    "minus",        "quod",       "maxime",      "placeat",      "facere",
+    "possimus",     "assumenda",  "repellendus", "temporibus",   "quibusdam",
+    "officiis",     "debitis",    "saepe",       "eveniet",      "voluptates",
+    "repudiandae",  "recusandae", "itaque",      "earum",        "hic",
+    "tenetur",      "sapiente",   "delectus",    "reiciendis",   "cillum",
+    "maiores",      "alias",      "perferendis", "doloribus",    "asperiores",
+    "repellat",     "minim",      "nostrud",     "exercitation", "ullamco",
+    "laboris",      "aliquip",    "duis",        "aute",         "irure",
 };
 static const unsigned kNbWords = sizeof(kWords) / sizeof(kWords[0]);
 
@@ -82,7 +98,7 @@ static const unsigned kNbWords = sizeof(kWords) / sizeof(kWords[0]);
 static const int kWeights[]      = { 0, 8, 6, 4, 3, 2 };
 static const unsigned kNbWeights = sizeof(kWeights) / sizeof(kWeights[0]);
 
-#define DISTRIB_SIZE_MAX 500
+#define DISTRIB_SIZE_MAX 650
 static int g_distrib[DISTRIB_SIZE_MAX] = { 0 };
 static unsigned g_distribCount         = 0;
 
@@ -210,7 +226,7 @@ static void generateParagraph(int nbSentences)
 {
     int i;
     for (i = 0; i < nbSentences; i++) {
-        int wordsPerSentence = about(10);
+        int wordsPerSentence = about(11);
         generateSentence(wordsPerSentence);
     }
     if (g_nbChars < g_maxChars) {

From 83598aa106ba0edaa8b449b2fe5d63773eeebc4e Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 20 Feb 2024 15:24:25 -0800
Subject: [PATCH 191/283] datagen generates lorem ipsum by default

---
 programs/lorem.c   |  10 +--
 tests/Makefile     |   2 +-
 tests/datagencli.c | 190 +++++++++++++++++++++++++--------------------
 tests/loremOut.c   |  50 ++++++++++++
 tests/loremOut.h   |  15 ++++
 5 files changed, 176 insertions(+), 91 deletions(-)
 create mode 100644 tests/loremOut.c
 create mode 100644 tests/loremOut.h

diff --git a/programs/lorem.c b/programs/lorem.c
index 2fb977b07d7..56e229058f9 100644
--- a/programs/lorem.c
+++ b/programs/lorem.c
@@ -23,11 +23,11 @@
  * and lacks a regularity more representative of text.
  *
  * The compression ratio achievable on the generated lorem ipsum
- * is still a bit too good, presumably because the dictionary is too small.
- * It would be possible to create some more complex scheme,
- * notably by enlarging the dictionary with a word generator,
- * and adding grammatical rules (composition) and syntax rules.
- * But that's probably overkill for the intended goal.
+ * is still a bit too good, presumably because the dictionary is a bit too
+ * small. It would be possible to create some more complex scheme, notably by
+ * enlarging the dictionary with a word generator, and adding grammatical rules
+ * (composition) and syntax rules. But that's probably overkill for the intended
+ * goal.
  */
 
 #include "lorem.h"
diff --git a/tests/Makefile b/tests/Makefile
index 35be1039b4e..ed7638b743f 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -206,7 +206,7 @@ paramgrill : LDLIBS += -lm
 paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c $(PRGDIR)/benchzstd.c $(PRGDIR)/datagen.c $(PRGDIR)/lorem.c paramgrill.c
 
 CLEAN += datagen
-datagen : $(PRGDIR)/datagen.c datagencli.c
+datagen : $(PRGDIR)/datagen.c $(PRGDIR)/lorem.c loremOut.c datagencli.c
 	$(LINK.c) $^ -o $@$(EXT)
 
 CLEAN += roundTripCrash
diff --git a/tests/datagencli.c b/tests/datagencli.c
index 09ec5e9ae36..b3020995385 100644
--- a/tests/datagencli.c
+++ b/tests/datagencli.c
@@ -8,122 +8,142 @@
  * You may select, at your option, one of the above-listed licenses.
  */
 
-
 /*-************************************
-*  Dependencies
-**************************************/
-#include "util.h"      /* Compiler options */
-#include      /* fprintf, stderr */
-#include "datagen.h"   /* RDG_generate */
-
+ *  Dependencies
+ **************************************/
+#include     /* fprintf, stderr */
+#include "datagen.h"  /* RDG_generate */
+#include "loremOut.h" /* LOREM_genOut */
+#include "util.h"     /* Compiler options */
 
 /*-************************************
-*  Constants
-**************************************/
-#define KB *(1 <<10)
-#define MB *(1 <<20)
-#define GB *(1U<<30)
+ *  Constants
+ **************************************/
+#define KB *(1 << 10)
+#define MB *(1 << 20)
+#define GB *(1U << 30)
 
 #define SIZE_DEFAULT ((64 KB) + 1)
 #define SEED_DEFAULT 0
-#define COMPRESSIBILITY_DEFAULT 50
-
+#define COMPRESSIBILITY_DEFAULT 9999
 
 /*-************************************
-*  Macros
-**************************************/
-#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+ *  Macros
+ **************************************/
+#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...)  \
+    if (displayLevel >= l) {  \
+        DISPLAY(__VA_ARGS__); \
+    }
 static unsigned displayLevel = 2;
 
-
 /*-*******************************************************
-*  Command line
-*********************************************************/
+ *  Command line
+ *********************************************************/
 static int usage(const char* programName)
 {
-    DISPLAY( "Compressible data generator\n");
-    DISPLAY( "Usage :\n");
-    DISPLAY( "      %s [args]\n", programName);
-    DISPLAY( "\n");
-    DISPLAY( "Arguments :\n");
-    DISPLAY( " -g#    : generate # data (default:%i)\n", SIZE_DEFAULT);
-    DISPLAY( " -s#    : Select seed (default:%i)\n", SEED_DEFAULT);
-    DISPLAY( " -P#    : Select compressibility in %% (default:%i%%)\n",
-                        COMPRESSIBILITY_DEFAULT);
-    DISPLAY( " -h     : display help and exit\n");
+    DISPLAY("Compressible data generator\n");
+    DISPLAY("Usage :\n");
+    DISPLAY("      %s [args]\n", programName);
+    DISPLAY("\n");
+    DISPLAY("Arguments :\n");
+    DISPLAY(" -g#    : generate # data (default:%i)\n", SIZE_DEFAULT);
+    DISPLAY(" -s#    : Select seed (default:%i)\n", SEED_DEFAULT);
+    DISPLAY(" -P#    : Select compressibility in %% (range [0-100])\n");
+    DISPLAY(" -h     : display help and exit\n");
     return 0;
 }
 
-
 int main(int argc, const char** argv)
 {
-    unsigned probaU32 = COMPRESSIBILITY_DEFAULT;
-    double litProba = 0.0;
-    U64 size = SIZE_DEFAULT;
-    U32 seed = SEED_DEFAULT;
+    unsigned probaU32             = COMPRESSIBILITY_DEFAULT;
+    double litProba               = 0.0;
+    U64 size                      = SIZE_DEFAULT;
+    U32 seed                      = SEED_DEFAULT;
     const char* const programName = argv[0];
 
     int argNb;
-    for(argNb=1; argNb='0') && (*argument<='9'))
-                        size *= 10, size += *argument++ - '0';
-                    if (*argument=='K') { size <<= 10; argument++; }
-                    if (*argument=='M') { size <<= 20; argument++; }
-                    if (*argument=='G') { size <<= 30; argument++; }
-                    if (*argument=='B') { argument++; }
-                    break;
-                case 's':
-                    argument++;
-                    seed=0;
-                    while ((*argument>='0') && (*argument<='9'))
-                        seed *= 10, seed += *argument++ - '0';
-                    break;
-                case 'P':
-                    argument++;
-                    probaU32 = 0;
-                    while ((*argument>='0') && (*argument<='9'))
-                        probaU32 *= 10, probaU32 += *argument++ - '0';
-                    if (probaU32>100) probaU32 = 100;
-                    break;
-                case 'L':   /* hidden argument : Literal distribution probability */
-                    argument++;
-                    litProba=0.;
-                    while ((*argument>='0') && (*argument<='9'))
-                        litProba *= 10, litProba += *argument++ - '0';
-                    if (litProba>100.) litProba=100.;
-                    litProba /= 100.;
-                    break;
-                case 'v':
-                    displayLevel = 4;
-                    argument++;
-                    break;
-                default:
-                    return usage(programName);
+            while (*argument != 0) {
+                switch (*argument) {
+                    case 'h':
+                        return usage(programName);
+                    case 'g':
+                        argument++;
+                        size = 0;
+                        while ((*argument >= '0') && (*argument <= '9'))
+                            size *= 10, size += (U64)(*argument++ - '0');
+                        if (*argument == 'K') {
+                            size <<= 10;
+                            argument++;
+                        }
+                        if (*argument == 'M') {
+                            size <<= 20;
+                            argument++;
+                        }
+                        if (*argument == 'G') {
+                            size <<= 30;
+                            argument++;
+                        }
+                        if (*argument == 'B') {
+                            argument++;
+                        }
+                        break;
+                    case 's':
+                        argument++;
+                        seed = 0;
+                        while ((*argument >= '0') && (*argument <= '9'))
+                            seed *= 10, seed += (U32)(*argument++ - '0');
+                        break;
+                    case 'P':
+                        argument++;
+                        probaU32 = 0;
+                        while ((*argument >= '0') && (*argument <= '9'))
+                            probaU32 *= 10,
+                                    probaU32 += (U32)(*argument++ - '0');
+                        if (probaU32 > 100)
+                            probaU32 = 100;
+                        break;
+                    case 'L': /* hidden argument : Literal distribution
+                                 probability */
+                        argument++;
+                        litProba = 0.;
+                        while ((*argument >= '0') && (*argument <= '9'))
+                            litProba *= 10, litProba += *argument++ - '0';
+                        if (litProba > 100.)
+                            litProba = 100.;
+                        litProba /= 100.;
+                        break;
+                    case 'v':
+                        displayLevel = 4;
+                        argument++;
+                        break;
+                    default:
+                        return usage(programName);
                 }
-    }   }   }   /* for(argNb=1; argNb 4 GB).
+ * Note that, beyond 1 paragraph, this generator produces
+ * a different content than LOREM_genBuffer (even when using same seed).
+ */
+
+#include "loremOut.h"
+#include 
+#include 
+#include "lorem.h"    /* LOREM_genBlock */
+#include "platform.h" /* Compiler options, SET_BINARY_MODE */
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define LOREM_BLOCKSIZE (1 << 10)
+void LOREM_genOut(unsigned long long size, unsigned seed)
+{
+    char buff[LOREM_BLOCKSIZE] = { 0 };
+    unsigned long long total   = 0;
+    size_t genBlockSize        = (size_t)MIN(size, LOREM_BLOCKSIZE);
+
+    /* init */
+    SET_BINARY_MODE(stdout);
+
+    /* Generate Ipsum text, one paragraph at a time */
+    while (total < size) {
+        size_t generated =
+                LOREM_genBlock(buff, genBlockSize, seed++, total == 0, 0);
+        assert(generated <= genBlockSize);
+        total += generated;
+        assert(total <= size);
+        fwrite(buff,
+               1,
+               generated,
+               stdout); /* note: should check potential write error */
+        if (size - total < genBlockSize)
+            genBlockSize = (size_t)(size - total);
+    }
+    assert(total == size);
+}
diff --git a/tests/loremOut.h b/tests/loremOut.h
new file mode 100644
index 00000000000..3a32e11613d
--- /dev/null
+++ b/tests/loremOut.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* LOREM_genOut():
+ * Generate @size bytes of compressible data using lorem ipsum generator into
+ * stdout.
+ */
+void LOREM_genOut(unsigned long long size, unsigned seed);

From 7a225c0c465149f1a72811dab669985b6ea5e5f4 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 20 Feb 2024 15:47:09 -0800
Subject: [PATCH 192/283] internal benchmark: can select size of generated
 synthetic sample

---
 programs/benchzstd.c | 1125 ++++++++++++++++++++++++++----------------
 programs/lorem.c     |   12 +-
 2 files changed, 712 insertions(+), 425 deletions(-)

diff --git a/programs/benchzstd.c b/programs/benchzstd.c
index b3af4c3322f..56af23d33e7 100644
--- a/programs/benchzstd.c
+++ b/programs/benchzstd.c
@@ -8,198 +8,230 @@
  * You may select, at your option, one of the above-listed licenses.
  */
 
-
 /* **************************************
-*  Tuning parameters
-****************************************/
-#ifndef BMK_TIMETEST_DEFAULT_S   /* default minimum time per test */
-# define BMK_TIMETEST_DEFAULT_S 3
+ *  Tuning parameters
+ ****************************************/
+#ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */
+#    define BMK_TIMETEST_DEFAULT_S 3
 #endif
 
-
 /* *************************************
-*  Includes
-***************************************/
-#include "platform.h"    /* Large Files support */
-#include "util.h"        /* UTIL_getFileSize, UTIL_sleep */
-#include       /* malloc, free */
-#include       /* memset, strerror */
-#include        /* fprintf, fopen */
+ *  Includes
+ ***************************************/
+#include  /* assert */
 #include 
-#include       /* assert */
+#include     /* fprintf, fopen */
+#include    /* malloc, free */
+#include    /* memset, strerror */
+#include "platform.h" /* Large Files support */
+#include "util.h"     /* UTIL_getFileSize, UTIL_sleep */
 
-#include "timefn.h"      /* UTIL_time_t */
-#include "benchfn.h"
 #include "../lib/common/mem.h"
+#include "benchfn.h"
+#include "timefn.h" /* UTIL_time_t */
 #ifndef ZSTD_STATIC_LINKING_ONLY
-# define ZSTD_STATIC_LINKING_ONLY
+#    define ZSTD_STATIC_LINKING_ONLY
 #endif
 #include "../lib/zstd.h"
-#include "datagen.h"     /* RDG_genBuffer */
-#include "lorem.h"       /* LOREM_genBuffer */
+#include "datagen.h" /* RDG_genBuffer */
+#include "lorem.h"   /* LOREM_genBuffer */
 #ifndef XXH_INLINE_ALL
-# define XXH_INLINE_ALL
+#    define XXH_INLINE_ALL
 #endif
 #include "../lib/common/xxhash.h"
-#include "benchzstd.h"
 #include "../lib/zstd_errors.h"
-
+#include "benchzstd.h"
 
 /* *************************************
-*  Constants
-***************************************/
+ *  Constants
+ ***************************************/
 #ifndef ZSTD_GIT_COMMIT
-#  define ZSTD_GIT_COMMIT_STRING ""
+#    define ZSTD_GIT_COMMIT_STRING ""
 #else
-#  define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT)
+#    define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT)
 #endif
 
-#define TIMELOOP_MICROSEC     (1*1000000ULL) /* 1 second */
-#define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
-#define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */
-#define COOLPERIOD_SEC        10
+#define TIMELOOP_MICROSEC (1 * 1000000ULL)             /* 1 second */
+#define TIMELOOP_NANOSEC (1 * 1000000000ULL)           /* 1 second */
+#define ACTIVEPERIOD_MICROSEC (70 * TIMELOOP_MICROSEC) /* 70 seconds */
+#define COOLPERIOD_SEC 10
 
-#define KB *(1 <<10)
-#define MB *(1 <<20)
-#define GB *(1U<<30)
+#define KB *(1 << 10)
+#define MB *(1 << 20)
+#define GB *(1U << 30)
 
 #define BMK_RUNTEST_DEFAULT_MS 1000
 
-static const size_t maxMemory = (sizeof(size_t)==4)  ?
-                    /* 32-bit */ (2 GB - 64 MB) :
-                    /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t)*8)-31));
-
+static const size_t maxMemory = (sizeof(size_t) == 4)
+        ?
+        /* 32-bit */ (2 GB - 64 MB)
+        :
+        /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t) * 8) - 31));
 
 /* *************************************
-*  console display
-***************************************/
-#define DISPLAY(...)         { fprintf(stderr, __VA_ARGS__); fflush(NULL); }
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-/* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
-#define OUTPUT(...)          { fprintf(stdout, __VA_ARGS__); fflush(NULL); }
-#define OUTPUTLEVEL(l, ...)  if (displayLevel>=l) { OUTPUT(__VA_ARGS__); }
-
+ *  console display
+ ***************************************/
+#define DISPLAY(...)                  \
+    {                                 \
+        fprintf(stderr, __VA_ARGS__); \
+        fflush(NULL);                 \
+    }
+#define DISPLAYLEVEL(l, ...)  \
+    if (displayLevel >= l) {  \
+        DISPLAY(__VA_ARGS__); \
+    }
+/* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : +
+ * progression;   4 : + information */
+#define OUTPUT(...)                   \
+    {                                 \
+        fprintf(stdout, __VA_ARGS__); \
+        fflush(NULL);                 \
+    }
+#define OUTPUTLEVEL(l, ...)  \
+    if (displayLevel >= l) { \
+        OUTPUT(__VA_ARGS__); \
+    }
 
 /* *************************************
-*  Exceptions
-***************************************/
+ *  Exceptions
+ ***************************************/
 #ifndef DEBUG
-#  define DEBUG 0
+#    define DEBUG 0
 #endif
-#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
-
-#define RETURN_ERROR_INT(errorNum, ...)  {               \
-    DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
-    DISPLAYLEVEL(1, "Error %i : ", errorNum);         \
-    DISPLAYLEVEL(1, __VA_ARGS__);                     \
-    DISPLAYLEVEL(1, " \n");                           \
-    return errorNum;                                  \
-}
+#define DEBUGOUTPUT(...)          \
+    {                             \
+        if (DEBUG)                \
+            DISPLAY(__VA_ARGS__); \
+    }
 
-#define CHECK_Z(zf) {              \
-    size_t const zerr = zf;        \
-    if (ZSTD_isError(zerr)) {      \
-        DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);  \
-        DISPLAY("Error : ");       \
-        DISPLAY("%s failed : %s",  \
-                #zf, ZSTD_getErrorName(zerr));   \
-        DISPLAY(" \n");            \
-        exit(1);                   \
-    }                              \
-}
+#define RETURN_ERROR_INT(errorNum, ...)                \
+    {                                                  \
+        DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
+        DISPLAYLEVEL(1, "Error %i : ", errorNum);      \
+        DISPLAYLEVEL(1, __VA_ARGS__);                  \
+        DISPLAYLEVEL(1, " \n");                        \
+        return errorNum;                               \
+    }
 
-#define RETURN_ERROR(errorNum, retType, ...)  {       \
-    retType r;                                        \
-    memset(&r, 0, sizeof(retType));                   \
-    DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
-    DISPLAYLEVEL(1, "Error %i : ", errorNum);         \
-    DISPLAYLEVEL(1, __VA_ARGS__);                     \
-    DISPLAYLEVEL(1, " \n");                           \
-    r.tag = errorNum;                                 \
-    return r;                                         \
-}
+#define CHECK_Z(zf)                                                  \
+    {                                                                \
+        size_t const zerr = zf;                                      \
+        if (ZSTD_isError(zerr)) {                                    \
+            DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);           \
+            DISPLAY("Error : ");                                     \
+            DISPLAY("%s failed : %s", #zf, ZSTD_getErrorName(zerr)); \
+            DISPLAY(" \n");                                          \
+            exit(1);                                                 \
+        }                                                            \
+    }
 
+#define RETURN_ERROR(errorNum, retType, ...)           \
+    {                                                  \
+        retType r;                                     \
+        memset(&r, 0, sizeof(retType));                \
+        DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
+        DISPLAYLEVEL(1, "Error %i : ", errorNum);      \
+        DISPLAYLEVEL(1, __VA_ARGS__);                  \
+        DISPLAYLEVEL(1, " \n");                        \
+        r.tag = errorNum;                              \
+        return r;                                      \
+    }
 
 /* *************************************
-*  Benchmark Parameters
-***************************************/
+ *  Benchmark Parameters
+ ***************************************/
 
-BMK_advancedParams_t BMK_initAdvancedParams(void) {
+BMK_advancedParams_t BMK_initAdvancedParams(void)
+{
     BMK_advancedParams_t const res = {
-        BMK_both, /* mode */
+        BMK_both,               /* mode */
         BMK_TIMETEST_DEFAULT_S, /* nbSeconds */
-        0, /* blockSize */
-        0, /* nbWorkers */
-        0, /* realTime */
-        0, /* additionalParam */
-        0, /* ldmFlag */
-        0, /* ldmMinMatch */
-        0, /* ldmHashLog */
-        0, /* ldmBuckSizeLog */
-        0,  /* ldmHashRateLog */
-        ZSTD_ps_auto, /* literalCompressionMode */
-        0 /* useRowMatchFinder */
+        0,                      /* blockSize */
+        0,                      /* nbWorkers */
+        0,                      /* realTime */
+        0,                      /* additionalParam */
+        0,                      /* ldmFlag */
+        0,                      /* ldmMinMatch */
+        0,                      /* ldmHashLog */
+        0,                      /* ldmBuckSizeLog */
+        0,                      /* ldmHashRateLog */
+        ZSTD_ps_auto,           /* literalCompressionMode */
+        0                       /* useRowMatchFinder */
     };
     return res;
 }
 
-
 /* ********************************************************
-*  Bench functions
-**********************************************************/
+ *  Bench functions
+ **********************************************************/
 typedef struct {
     const void* srcPtr;
     size_t srcSize;
-    void*  cPtr;
+    void* cPtr;
     size_t cRoom;
     size_t cSize;
-    void*  resPtr;
+    void* resPtr;
     size_t resSize;
 } blockParam_t;
 
 #undef MIN
 #undef MAX
-#define MIN(a,b)    ((a) < (b) ? (a) : (b))
-#define MAX(a,b)    ((a) > (b) ? (a) : (b))
-
-static void
-BMK_initCCtx(ZSTD_CCtx* ctx,
-            const void* dictBuffer, size_t dictBufferSize,
-            int cLevel,
-            const ZSTD_compressionParameters* comprParams,
-            const BMK_advancedParams_t* adv)
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+static void BMK_initCCtx(
+        ZSTD_CCtx* ctx,
+        const void* dictBuffer,
+        size_t dictBufferSize,
+        int cLevel,
+        const ZSTD_compressionParameters* comprParams,
+        const BMK_advancedParams_t* adv)
 {
     ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters);
-    if (adv->nbWorkers==1) {
+    if (adv->nbWorkers == 1) {
         CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0));
     } else {
         CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers));
     }
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag));
+    CHECK_Z(ZSTD_CCtx_setParameter(
+            ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder));
+    CHECK_Z(ZSTD_CCtx_setParameter(
+            ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag));
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch));
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, (int)comprParams->windowLog));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, (int)comprParams->hashLog));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, (int)comprParams->chainLog));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, (int)comprParams->searchLog));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, (int)comprParams->minMatch));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, (int)comprParams->targetLength));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_literalCompressionMode, (int)adv->literalCompressionMode));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, (int)comprParams->strategy));
+    CHECK_Z(ZSTD_CCtx_setParameter(
+            ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog));
+    CHECK_Z(ZSTD_CCtx_setParameter(
+            ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog));
+    CHECK_Z(ZSTD_CCtx_setParameter(
+            ctx, ZSTD_c_windowLog, (int)comprParams->windowLog));
+    CHECK_Z(ZSTD_CCtx_setParameter(
+            ctx, ZSTD_c_hashLog, (int)comprParams->hashLog));
+    CHECK_Z(ZSTD_CCtx_setParameter(
+            ctx, ZSTD_c_chainLog, (int)comprParams->chainLog));
+    CHECK_Z(ZSTD_CCtx_setParameter(
+            ctx, ZSTD_c_searchLog, (int)comprParams->searchLog));
+    CHECK_Z(ZSTD_CCtx_setParameter(
+            ctx, ZSTD_c_minMatch, (int)comprParams->minMatch));
+    CHECK_Z(ZSTD_CCtx_setParameter(
+            ctx, ZSTD_c_targetLength, (int)comprParams->targetLength));
+    CHECK_Z(ZSTD_CCtx_setParameter(
+            ctx,
+            ZSTD_c_literalCompressionMode,
+            (int)adv->literalCompressionMode));
+    CHECK_Z(ZSTD_CCtx_setParameter(
+            ctx, ZSTD_c_strategy, (int)comprParams->strategy));
     CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize));
 }
 
-static void BMK_initDCtx(ZSTD_DCtx* dctx,
-    const void* dictBuffer, size_t dictBufferSize) {
+static void
+BMK_initDCtx(ZSTD_DCtx* dctx, const void* dictBuffer, size_t dictBufferSize)
+{
     CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
     CHECK_Z(ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize));
 }
 
-
 typedef struct {
     ZSTD_CCtx* cctx;
     const void* dictBuffer;
@@ -209,9 +241,16 @@ typedef struct {
     const BMK_advancedParams_t* adv;
 } BMK_initCCtxArgs;
 
-static size_t local_initCCtx(void* payload) {
+static size_t local_initCCtx(void* payload)
+{
     BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload;
-    BMK_initCCtx(ag->cctx, ag->dictBuffer, ag->dictBufferSize, ag->cLevel, ag->comprParams, ag->adv);
+    BMK_initCCtx(
+            ag->cctx,
+            ag->dictBuffer,
+            ag->dictBufferSize,
+            ag->cLevel,
+            ag->comprParams,
+            ag->adv);
     return 0;
 }
 
@@ -221,18 +260,20 @@ typedef struct {
     size_t dictBufferSize;
 } BMK_initDCtxArgs;
 
-static size_t local_initDCtx(void* payload) {
+static size_t local_initDCtx(void* payload)
+{
     BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload;
     BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize);
     return 0;
 }
 
-
 /* `addArgs` is the context */
 static size_t local_defaultCompress(
-                    const void* srcBuffer, size_t srcSize,
-                    void* dstBuffer, size_t dstSize,
-                    void* addArgs)
+        const void* srcBuffer,
+        size_t srcSize,
+        void* dstBuffer,
+        size_t dstSize,
+        void* addArgs)
 {
     ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs;
     return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize);
@@ -240,18 +281,24 @@ static size_t local_defaultCompress(
 
 /* `addArgs` is the context */
 static size_t local_defaultDecompress(
-                    const void* srcBuffer, size_t srcSize,
-                    void* dstBuffer, size_t dstCapacity,
-                    void* addArgs)
+        const void* srcBuffer,
+        size_t srcSize,
+        void* dstBuffer,
+        size_t dstCapacity,
+        void* addArgs)
 {
-    size_t moreToFlush = 1;
+    size_t moreToFlush    = 1;
     ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs;
     ZSTD_inBuffer in;
     ZSTD_outBuffer out;
-    in.src = srcBuffer; in.size = srcSize; in.pos = 0;
-    out.dst = dstBuffer; out.size = dstCapacity; out.pos = 0;
+    in.src   = srcBuffer;
+    in.size  = srcSize;
+    in.pos   = 0;
+    out.dst  = dstBuffer;
+    out.size = dstCapacity;
+    out.pos  = 0;
     while (moreToFlush) {
-        if(out.pos == out.size) {
+        if (out.pos == out.size) {
             return (size_t)-ZSTD_error_dstSize_tooSmall;
         }
         moreToFlush = ZSTD_decompressStream(dctx, &out, &in);
@@ -260,10 +307,8 @@ static size_t local_defaultDecompress(
         }
     }
     return out.pos;
-
 }
 
-
 /* ================================================================= */
 /*      Benchmark Zstandard, mem-to-mem scenarios                    */
 /* ================================================================= */
@@ -287,104 +332,145 @@ static BMK_benchOutcome_t BMK_benchOutcome_error(void)
     return b;
 }
 
-static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(BMK_benchResult_t result)
+static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(
+        BMK_benchResult_t result)
 {
     BMK_benchOutcome_t b;
-    b.tag = 0;
+    b.tag                         = 0;
     b.internal_never_use_directly = result;
     return b;
 }
 
-
 /* benchMem with no allocation */
-static BMK_benchOutcome_t
-BMK_benchMemAdvancedNoAlloc(
-                    const void** srcPtrs, size_t* srcSizes,
-                    void** cPtrs, size_t* cCapacities, size_t* cSizes,
-                    void** resPtrs, size_t* resSizes,
-                    void** resultBufferPtr, void* compressedBuffer,
-                    size_t maxCompressedSize,
-                    BMK_timedFnState_t* timeStateCompress,
-                    BMK_timedFnState_t* timeStateDecompress,
-
-                    const void* srcBuffer, size_t srcSize,
-                    const size_t* fileSizes, unsigned nbFiles,
-                    const int cLevel,
-                    const ZSTD_compressionParameters* comprParams,
-                    const void* dictBuffer, size_t dictBufferSize,
-                    ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
-                    int displayLevel, const char* displayName,
-                    const BMK_advancedParams_t* adv)
+static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
+        const void** srcPtrs,
+        size_t* srcSizes,
+        void** cPtrs,
+        size_t* cCapacities,
+        size_t* cSizes,
+        void** resPtrs,
+        size_t* resSizes,
+        void** resultBufferPtr,
+        void* compressedBuffer,
+        size_t maxCompressedSize,
+        BMK_timedFnState_t* timeStateCompress,
+        BMK_timedFnState_t* timeStateDecompress,
+
+        const void* srcBuffer,
+        size_t srcSize,
+        const size_t* fileSizes,
+        unsigned nbFiles,
+        const int cLevel,
+        const ZSTD_compressionParameters* comprParams,
+        const void* dictBuffer,
+        size_t dictBufferSize,
+        ZSTD_CCtx* cctx,
+        ZSTD_DCtx* dctx,
+        int displayLevel,
+        const char* displayName,
+        const BMK_advancedParams_t* adv)
 {
-    size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize);  /* avoid div by 0 */
+    size_t const blockSize =
+            ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly))
+                     ? adv->blockSize
+                     : srcSize)
+            + (!srcSize); /* avoid div by 0 */
     BMK_benchResult_t benchResult;
     size_t const loadedCompressedSize = srcSize;
-    size_t cSize = 0;
-    double ratio = 0.;
+    size_t cSize                      = 0;
+    double ratio                      = 0.;
     U32 nbBlocks;
 
-    assert(cctx != NULL); assert(dctx != NULL);
+    assert(cctx != NULL);
+    assert(dctx != NULL);
 
     /* init */
     memset(&benchResult, 0, sizeof(benchResult));
-    if (strlen(displayName)>17) displayName += strlen(displayName) - 17;   /* display last 17 characters */
+    if (strlen(displayName) > 17)
+        displayName +=
+                strlen(displayName) - 17; /* display last 17 characters */
     if (adv->mode == BMK_decodeOnly) {
         /* benchmark only decompression : source must be already compressed */
         const char* srcPtr = (const char*)srcBuffer;
-        U64 totalDSize64 = 0;
+        U64 totalDSize64   = 0;
         U32 fileNb;
-        for (fileNb=0; fileNb decodedSize) {  /* size_t overflow */
-                RETURN_ERROR(32, BMK_benchOutcome_t, "decompressed size is too large for local system");
+            if (totalDSize64 > decodedSize) { /* size_t overflow */
+                RETURN_ERROR(
+                        32,
+                        BMK_benchOutcome_t,
+                        "decompressed size is too large for local system");
             }
             *resultBufferPtr = malloc(decodedSize);
             if (!(*resultBufferPtr)) {
-                RETURN_ERROR(33, BMK_benchOutcome_t, "allocation error: not enough memory");
+                RETURN_ERROR(
+                        33,
+                        BMK_benchOutcome_t,
+                        "allocation error: not enough memory");
             }
-            cSize = srcSize;
+            cSize   = srcSize;
             srcSize = decodedSize;
-            ratio = (double)srcSize / (double)cSize;
+            ratio   = (double)srcSize / (double)cSize;
         }
     }
 
     /* Init data blocks  */
-    {   const char* srcPtr = (const char*)srcBuffer;
-        char* cPtr = (char*)compressedBuffer;
-        char* resPtr = (char*)(*resultBufferPtr);
+    {
+        const char* srcPtr = (const char*)srcBuffer;
+        char* cPtr         = (char*)compressedBuffer;
+        char* resPtr       = (char*)(*resultBufferPtr);
         U32 fileNb;
-        for (nbBlocks=0, fileNb=0; fileNbmode == BMK_decodeOnly) ? 1 : (U32)((remaining + (blockSize-1)) / blockSize);
-            U32 const blockEnd = nbBlocks + nbBlocksforThisFile;
-            for ( ; nbBlocksmode == BMK_decodeOnly)
+                    ? 1
+                    : (U32)((remaining + (blockSize - 1)) / blockSize);
+            U32 const blockEnd            = nbBlocks + nbBlocksforThisFile;
+            for (; nbBlocks < blockEnd; nbBlocks++) {
                 size_t const thisBlockSize = MIN(remaining, blockSize);
-                srcPtrs[nbBlocks] = srcPtr;
-                srcSizes[nbBlocks] = thisBlockSize;
-                cPtrs[nbBlocks] = cPtr;
-                cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize);
-                resPtrs[nbBlocks] = resPtr;
-                resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize;
+                srcPtrs[nbBlocks]          = srcPtr;
+                srcSizes[nbBlocks]         = thisBlockSize;
+                cPtrs[nbBlocks]            = cPtr;
+                cCapacities[nbBlocks]      = (adv->mode == BMK_decodeOnly)
+                             ? thisBlockSize
+                             : ZSTD_compressBound(thisBlockSize);
+                resPtrs[nbBlocks]          = resPtr;
+                resSizes[nbBlocks]         = (adv->mode == BMK_decodeOnly)
+                                ? (size_t)ZSTD_findDecompressedSize(
+                                srcPtr, thisBlockSize)
+                                : thisBlockSize;
                 srcPtr += thisBlockSize;
                 cPtr += cCapacities[nbBlocks];
                 resPtr += thisBlockSize;
                 remaining -= thisBlockSize;
                 if (adv->mode == BMK_decodeOnly) {
-                    cSizes[nbBlocks] = thisBlockSize;
+                    cSizes[nbBlocks]  = thisBlockSize;
                     benchResult.cSize = thisBlockSize;
-    }   }   }   }
+                }
+            }
+        }
+    }
 
     /* warming up `compressedBuffer` */
     if (adv->mode == BMK_decodeOnly) {
@@ -394,236 +480,329 @@ BMK_benchMemAdvancedNoAlloc(
     }
 
     if (!UTIL_support_MT_measurements() && adv->nbWorkers > 1) {
-        OUTPUTLEVEL(2, "Warning : time measurements may be incorrect in multithreading mode... \n")
+        OUTPUTLEVEL(
+                2,
+                "Warning : time measurements may be incorrect in multithreading mode... \n")
     }
 
     /* Bench */
-    {   U64 const crcOrig = (adv->mode == BMK_decodeOnly) ? 0 : XXH64(srcBuffer, srcSize, 0);
-#       define NB_MARKS 4
+    {
+        U64 const crcOrig = (adv->mode == BMK_decodeOnly)
+                ? 0
+                : XXH64(srcBuffer, srcSize, 0);
+#define NB_MARKS 4
         const char* marks[NB_MARKS] = { " |", " /", " =", " \\" };
-        U32 markNb = 0;
-        int compressionCompleted = (adv->mode == BMK_decodeOnly);
-        int decompressionCompleted = (adv->mode == BMK_compressOnly);
+        U32 markNb                  = 0;
+        int compressionCompleted    = (adv->mode == BMK_decodeOnly);
+        int decompressionCompleted  = (adv->mode == BMK_compressOnly);
         BMK_benchParams_t cbp, dbp;
         BMK_initCCtxArgs cctxprep;
         BMK_initDCtxArgs dctxprep;
 
-        cbp.benchFn = local_defaultCompress;   /* ZSTD_compress2 */
-        cbp.benchPayload = cctx;
-        cbp.initFn = local_initCCtx;   /* BMK_initCCtx */
-        cbp.initPayload = &cctxprep;
-        cbp.errorFn = ZSTD_isError;
-        cbp.blockCount = nbBlocks;
-        cbp.srcBuffers = srcPtrs;
-        cbp.srcSizes = srcSizes;
-        cbp.dstBuffers = cPtrs;
+        cbp.benchFn       = local_defaultCompress; /* ZSTD_compress2 */
+        cbp.benchPayload  = cctx;
+        cbp.initFn        = local_initCCtx; /* BMK_initCCtx */
+        cbp.initPayload   = &cctxprep;
+        cbp.errorFn       = ZSTD_isError;
+        cbp.blockCount    = nbBlocks;
+        cbp.srcBuffers    = srcPtrs;
+        cbp.srcSizes      = srcSizes;
+        cbp.dstBuffers    = cPtrs;
         cbp.dstCapacities = cCapacities;
-        cbp.blockResults = cSizes;
+        cbp.blockResults  = cSizes;
 
-        cctxprep.cctx = cctx;
-        cctxprep.dictBuffer = dictBuffer;
+        cctxprep.cctx           = cctx;
+        cctxprep.dictBuffer     = dictBuffer;
         cctxprep.dictBufferSize = dictBufferSize;
-        cctxprep.cLevel = cLevel;
-        cctxprep.comprParams = comprParams;
-        cctxprep.adv = adv;
-
-        dbp.benchFn = local_defaultDecompress;
-        dbp.benchPayload = dctx;
-        dbp.initFn = local_initDCtx;
-        dbp.initPayload = &dctxprep;
-        dbp.errorFn = ZSTD_isError;
-        dbp.blockCount = nbBlocks;
-        dbp.srcBuffers = (const void* const *) cPtrs;
-        dbp.srcSizes = cSizes;
-        dbp.dstBuffers = resPtrs;
+        cctxprep.cLevel         = cLevel;
+        cctxprep.comprParams    = comprParams;
+        cctxprep.adv            = adv;
+
+        dbp.benchFn       = local_defaultDecompress;
+        dbp.benchPayload  = dctx;
+        dbp.initFn        = local_initDCtx;
+        dbp.initPayload   = &dctxprep;
+        dbp.errorFn       = ZSTD_isError;
+        dbp.blockCount    = nbBlocks;
+        dbp.srcBuffers    = (const void* const*)cPtrs;
+        dbp.srcSizes      = cSizes;
+        dbp.dstBuffers    = resPtrs;
         dbp.dstCapacities = resSizes;
-        dbp.blockResults = NULL;
+        dbp.blockResults  = NULL;
 
-        dctxprep.dctx = dctx;
-        dctxprep.dictBuffer = dictBuffer;
+        dctxprep.dctx           = dctx;
+        dctxprep.dictBuffer     = dictBuffer;
         dctxprep.dictBufferSize = dictBufferSize;
 
-        OUTPUTLEVEL(2, "\r%70s\r", "");   /* blank line */
+        OUTPUTLEVEL(2, "\r%70s\r", ""); /* blank line */
         assert(srcSize < UINT_MAX);
-        OUTPUTLEVEL(2, "%2s-%-17.17s :%10u -> \r", marks[markNb], displayName, (unsigned)srcSize);
+        OUTPUTLEVEL(
+                2,
+                "%2s-%-17.17s :%10u -> \r",
+                marks[markNb],
+                displayName,
+                (unsigned)srcSize);
 
         while (!(compressionCompleted && decompressionCompleted)) {
             if (!compressionCompleted) {
-                BMK_runOutcome_t const cOutcome = BMK_benchTimedFn( timeStateCompress, cbp);
+                BMK_runOutcome_t const cOutcome =
+                        BMK_benchTimedFn(timeStateCompress, cbp);
 
                 if (!BMK_isSuccessful_runOutcome(cOutcome)) {
                     RETURN_ERROR(30, BMK_benchOutcome_t, "compression error");
                 }
 
-                {   BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome);
-                    cSize = cResult.sumOfReturn;
+                {
+                    BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome);
+                    cSize                       = cResult.sumOfReturn;
                     ratio = (double)srcSize / (double)cSize;
-                    {   BMK_benchResult_t newResult;
-                        newResult.cSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
+                    {
+                        BMK_benchResult_t newResult;
+                        newResult.cSpeed =
+                                (U64)((double)srcSize * TIMELOOP_NANOSEC
+                                      / cResult.nanoSecPerRun);
                         benchResult.cSize = cSize;
                         if (newResult.cSpeed > benchResult.cSpeed)
                             benchResult.cSpeed = newResult.cSpeed;
-                }   }
+                    }
+                }
 
-                {   int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
+                {
+                    int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
                     assert(cSize < UINT_MAX);
-                    OUTPUTLEVEL(2, "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s \r",
-                            marks[markNb], displayName,
-                            (unsigned)srcSize, (unsigned)cSize,
-                            ratioAccuracy, ratio,
-                            benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT);
+                    OUTPUTLEVEL(
+                            2,
+                            "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s \r",
+                            marks[markNb],
+                            displayName,
+                            (unsigned)srcSize,
+                            (unsigned)cSize,
+                            ratioAccuracy,
+                            ratio,
+                            benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1,
+                            (double)benchResult.cSpeed / MB_UNIT);
                 }
-                compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress);
+                compressionCompleted =
+                        BMK_isCompleted_TimedFn(timeStateCompress);
             }
 
-            if(!decompressionCompleted) {
-                BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp);
+            if (!decompressionCompleted) {
+                BMK_runOutcome_t const dOutcome =
+                        BMK_benchTimedFn(timeStateDecompress, dbp);
 
-                if(!BMK_isSuccessful_runOutcome(dOutcome)) {
+                if (!BMK_isSuccessful_runOutcome(dOutcome)) {
                     RETURN_ERROR(30, BMK_benchOutcome_t, "decompression error");
                 }
 
-                {   BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome);
-                    U64 const newDSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
+                {
+                    BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome);
+                    U64 const newDSpeed =
+                            (U64)((double)srcSize * TIMELOOP_NANOSEC
+                                  / dResult.nanoSecPerRun);
                     if (newDSpeed > benchResult.dSpeed)
                         benchResult.dSpeed = newDSpeed;
                 }
 
-                {   int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
-                    OUTPUTLEVEL(2, "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s, %6.1f MB/s\r",
-                            marks[markNb], displayName,
-                            (unsigned)srcSize, (unsigned)cSize,
-                            ratioAccuracy, ratio,
-                            benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT,
+                {
+                    int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
+                    OUTPUTLEVEL(
+                            2,
+                            "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s, %6.1f MB/s\r",
+                            marks[markNb],
+                            displayName,
+                            (unsigned)srcSize,
+                            (unsigned)cSize,
+                            ratioAccuracy,
+                            ratio,
+                            benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1,
+                            (double)benchResult.cSpeed / MB_UNIT,
                             (double)benchResult.dSpeed / MB_UNIT);
                 }
-                decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress);
+                decompressionCompleted =
+                        BMK_isCompleted_TimedFn(timeStateDecompress);
             }
-            markNb = (markNb+1) % NB_MARKS;
-        }   /* while (!(compressionCompleted && decompressionCompleted)) */
+            markNb = (markNb + 1) % NB_MARKS;
+        } /* while (!(compressionCompleted && decompressionCompleted)) */
 
         /* CRC Checking */
-        {   const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr);
-            U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
-            if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) {
+        {
+            const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr);
+            U64 const crcCheck       = XXH64(resultBuffer, srcSize, 0);
+            if ((adv->mode == BMK_both) && (crcOrig != crcCheck)) {
                 size_t u;
                 DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n",
-                        displayName, (unsigned)crcOrig, (unsigned)crcCheck);
-                for (u=0; u u) break;
+                            if (bacc + srcSizes[segNb] > u)
+                                break;
                             bacc += srcSizes[segNb];
                         }
                         pos = (U32)(u - bacc);
                         bNb = pos / (128 KB);
-                        DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos);
-                        {   size_t const lowest = (u>5) ? 5 : u;
+                        DISPLAY("(sample %u, block %u, pos %u) \n",
+                                segNb,
+                                bNb,
+                                pos);
+                        {
+                            size_t const lowest = (u > 5) ? 5 : u;
                             size_t n;
                             DISPLAY("origin: ");
-                            for (n=lowest; n>0; n--)
-                                DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u-n]);
+                            for (n = lowest; n > 0; n--)
+                                DISPLAY("%02X ",
+                                        ((const BYTE*)srcBuffer)[u - n]);
                             DISPLAY(" :%02X:  ", ((const BYTE*)srcBuffer)[u]);
-                            for (n=1; n<3; n++)
-                                DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
+                            for (n = 1; n < 3; n++)
+                                DISPLAY("%02X ",
+                                        ((const BYTE*)srcBuffer)[u + n]);
                             DISPLAY(" \n");
                             DISPLAY("decode: ");
-                            for (n=lowest; n>0; n--)
-                                DISPLAY("%02X ", resultBuffer[u-n]);
+                            for (n = lowest; n > 0; n--)
+                                DISPLAY("%02X ", resultBuffer[u - n]);
                             DISPLAY(" :%02X:  ", resultBuffer[u]);
-                            for (n=1; n<3; n++)
-                                DISPLAY("%02X ", resultBuffer[u+n]);
+                            for (n = 1; n < 3; n++)
+                                DISPLAY("%02X ", resultBuffer[u + n]);
                             DISPLAY(" \n");
                         }
                         break;
                     }
-                    if (u==srcSize-1) {  /* should never happen */
+                    if (u == srcSize - 1) { /* should never happen */
                         DISPLAY("no difference detected\n");
                     }
-                }   /* for (u=0; umode == BMK_both) && (crcOrig!=crcCheck)) */
-        }   /* CRC Checking */
+                } /* for (u=0; umode == BMK_both) && (crcOrig!=crcCheck)) */
+        }         /* CRC Checking */
 
-        if (displayLevel == 1) {   /* hidden display mode -q, used by python speed benchmark */
+        if (displayLevel
+            == 1) { /* hidden display mode -q, used by python speed benchmark */
             double const cSpeed = (double)benchResult.cSpeed / MB_UNIT;
             double const dSpeed = (double)benchResult.dSpeed / MB_UNIT;
             if (adv->additionalParam) {
-                OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam);
+                OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n",
+                       cLevel,
+                       (int)cSize,
+                       ratio,
+                       cSpeed,
+                       dSpeed,
+                       displayName,
+                       adv->additionalParam);
             } else {
-                OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
+                OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n",
+                       cLevel,
+                       (int)cSize,
+                       ratio,
+                       cSpeed,
+                       dSpeed,
+                       displayName);
             }
         }
 
         OUTPUTLEVEL(2, "%2i#\n", cLevel);
-    }   /* Bench */
+    } /* Bench */
 
-    benchResult.cMem = (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx);
+    benchResult.cMem =
+            (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx);
     return BMK_benchOutcome_setValidResult(benchResult);
 }
 
-BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
-                        void* dstBuffer, size_t dstCapacity,
-                        const size_t* fileSizes, unsigned nbFiles,
-                        int cLevel, const ZSTD_compressionParameters* comprParams,
-                        const void* dictBuffer, size_t dictBufferSize,
-                        int displayLevel, const char* displayName, const BMK_advancedParams_t* adv)
+BMK_benchOutcome_t BMK_benchMemAdvanced(
+        const void* srcBuffer,
+        size_t srcSize,
+        void* dstBuffer,
+        size_t dstCapacity,
+        const size_t* fileSizes,
+        unsigned nbFiles,
+        int cLevel,
+        const ZSTD_compressionParameters* comprParams,
+        const void* dictBuffer,
+        size_t dictBufferSize,
+        int displayLevel,
+        const char* displayName,
+        const BMK_advancedParams_t* adv)
 
 {
-    int const dstParamsError = !dstBuffer ^ !dstCapacity;  /* must be both NULL or none */
+    int const dstParamsError =
+            !dstBuffer ^ !dstCapacity; /* must be both NULL or none */
 
-    size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
-    U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
+    size_t const blockSize =
+            ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly))
+                     ? adv->blockSize
+                     : srcSize)
+            + (!srcSize) /* avoid div by 0 */;
+    U32 const maxNbBlocks =
+            (U32)((srcSize + (blockSize - 1)) / blockSize) + nbFiles;
 
     /* these are the blockTable parameters, just split up */
-    const void ** const srcPtrs = (const void**)malloc(maxNbBlocks * sizeof(void*));
+    const void** const srcPtrs =
+            (const void**)malloc(maxNbBlocks * sizeof(void*));
     size_t* const srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
 
-
-    void ** const cPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
-    size_t* const cSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
+    void** const cPtrs        = (void**)malloc(maxNbBlocks * sizeof(void*));
+    size_t* const cSizes      = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
     size_t* const cCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
 
-    void ** const resPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
+    void** const resPtrs   = (void**)malloc(maxNbBlocks * sizeof(void*));
     size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
 
-    BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
-    BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
+    BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(
+            adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
+    BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(
+            adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
 
     ZSTD_CCtx* const cctx = ZSTD_createCCtx();
     ZSTD_DCtx* const dctx = ZSTD_createDCtx();
 
-    const size_t maxCompressedSize = dstCapacity ? dstCapacity : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);
+    const size_t maxCompressedSize = dstCapacity
+            ? dstCapacity
+            : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);
 
-    void* const internalDstBuffer = dstBuffer ? NULL : malloc(maxCompressedSize);
+    void* const internalDstBuffer =
+            dstBuffer ? NULL : malloc(maxCompressedSize);
     void* const compressedBuffer = dstBuffer ? dstBuffer : internalDstBuffer;
 
-    BMK_benchOutcome_t outcome = BMK_benchOutcome_error();  /* error by default */
+    BMK_benchOutcome_t outcome =
+            BMK_benchOutcome_error(); /* error by default */
 
     void* resultBuffer = srcSize ? malloc(srcSize) : NULL;
 
-    int const allocationincomplete = !srcPtrs || !srcSizes || !cPtrs ||
-        !cSizes || !cCapacities || !resPtrs || !resSizes ||
-        !timeStateCompress || !timeStateDecompress ||
-        !cctx || !dctx ||
-        !compressedBuffer || !resultBuffer;
-
+    int const allocationincomplete = !srcPtrs || !srcSizes || !cPtrs || !cSizes
+            || !cCapacities || !resPtrs || !resSizes || !timeStateCompress
+            || !timeStateDecompress || !cctx || !dctx || !compressedBuffer
+            || !resultBuffer;
 
     if (!allocationincomplete && !dstParamsError) {
-        outcome = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes,
-                                            cPtrs, cCapacities, cSizes,
-                                            resPtrs, resSizes,
-                                            &resultBuffer,
-                                            compressedBuffer, maxCompressedSize,
-                                            timeStateCompress, timeStateDecompress,
-                                            srcBuffer, srcSize,
-                                            fileSizes, nbFiles,
-                                            cLevel, comprParams,
-                                            dictBuffer, dictBufferSize,
-                                            cctx, dctx,
-                                            displayLevel, displayName, adv);
+        outcome = BMK_benchMemAdvancedNoAlloc(
+                srcPtrs,
+                srcSizes,
+                cPtrs,
+                cCapacities,
+                cSizes,
+                resPtrs,
+                resSizes,
+                &resultBuffer,
+                compressedBuffer,
+                maxCompressedSize,
+                timeStateCompress,
+                timeStateDecompress,
+                srcBuffer,
+                srcSize,
+                fileSizes,
+                nbFiles,
+                cLevel,
+                comprParams,
+                dictBuffer,
+                dictBufferSize,
+                cctx,
+                dctx,
+                displayLevel,
+                displayName,
+                adv);
     }
 
     /* clean up */
@@ -644,67 +823,104 @@ BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
     free(resPtrs);
     free(resSizes);
 
-    if(allocationincomplete) {
-        RETURN_ERROR(31, BMK_benchOutcome_t, "allocation error : not enough memory");
+    if (allocationincomplete) {
+        RETURN_ERROR(
+                31, BMK_benchOutcome_t, "allocation error : not enough memory");
     }
 
-    if(dstParamsError) {
+    if (dstParamsError) {
         RETURN_ERROR(32, BMK_benchOutcome_t, "Dst parameters not coherent");
     }
     return outcome;
 }
 
-BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize,
-                        const size_t* fileSizes, unsigned nbFiles,
-                        int cLevel, const ZSTD_compressionParameters* comprParams,
-                        const void* dictBuffer, size_t dictBufferSize,
-                        int displayLevel, const char* displayName) {
-
+BMK_benchOutcome_t BMK_benchMem(
+        const void* srcBuffer,
+        size_t srcSize,
+        const size_t* fileSizes,
+        unsigned nbFiles,
+        int cLevel,
+        const ZSTD_compressionParameters* comprParams,
+        const void* dictBuffer,
+        size_t dictBufferSize,
+        int displayLevel,
+        const char* displayName)
+{
     BMK_advancedParams_t const adv = BMK_initAdvancedParams();
-    return BMK_benchMemAdvanced(srcBuffer, srcSize,
-                                NULL, 0,
-                                fileSizes, nbFiles,
-                                cLevel, comprParams,
-                                dictBuffer, dictBufferSize,
-                                displayLevel, displayName, &adv);
+    return BMK_benchMemAdvanced(
+            srcBuffer,
+            srcSize,
+            NULL,
+            0,
+            fileSizes,
+            nbFiles,
+            cLevel,
+            comprParams,
+            dictBuffer,
+            dictBufferSize,
+            displayLevel,
+            displayName,
+            &adv);
 }
 
-static BMK_benchOutcome_t BMK_benchCLevel(const void* srcBuffer, size_t benchedSize,
-                            const size_t* fileSizes, unsigned nbFiles,
-                            int cLevel, const ZSTD_compressionParameters* comprParams,
-                            const void* dictBuffer, size_t dictBufferSize,
-                            int displayLevel, const char* displayName,
-                            BMK_advancedParams_t const * const adv)
+static BMK_benchOutcome_t BMK_benchCLevel(
+        const void* srcBuffer,
+        size_t benchedSize,
+        const size_t* fileSizes,
+        unsigned nbFiles,
+        int cLevel,
+        const ZSTD_compressionParameters* comprParams,
+        const void* dictBuffer,
+        size_t dictBufferSize,
+        int displayLevel,
+        const char* displayName,
+        BMK_advancedParams_t const* const adv)
 {
     const char* pch = strrchr(displayName, '\\'); /* Windows */
-    if (!pch) pch = strrchr(displayName, '/');    /* Linux */
-    if (pch) displayName = pch+1;
+    if (!pch)
+        pch = strrchr(displayName, '/'); /* Linux */
+    if (pch)
+        displayName = pch + 1;
 
     if (adv->realTime) {
         DISPLAYLEVEL(2, "Note : switching to real-time priority \n");
         SET_REALTIME_PRIORITY;
     }
 
-    if (displayLevel == 1 && !adv->additionalParam)   /* --quiet mode */
+    if (displayLevel == 1 && !adv->additionalParam) /* --quiet mode */
         OUTPUT("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n",
-                ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING,
-                (unsigned)benchedSize, adv->nbSeconds, (unsigned)(adv->blockSize>>10));
-
-    return BMK_benchMemAdvanced(srcBuffer, benchedSize,
-                                NULL, 0,
-                                fileSizes, nbFiles,
-                                cLevel, comprParams,
-                                dictBuffer, dictBufferSize,
-                                displayLevel, displayName, adv);
+               ZSTD_VERSION_STRING,
+               ZSTD_GIT_COMMIT_STRING,
+               (unsigned)benchedSize,
+               adv->nbSeconds,
+               (unsigned)(adv->blockSize >> 10));
+
+    return BMK_benchMemAdvanced(
+            srcBuffer,
+            benchedSize,
+            NULL,
+            0,
+            fileSizes,
+            nbFiles,
+            cLevel,
+            comprParams,
+            dictBuffer,
+            dictBufferSize,
+            displayLevel,
+            displayName,
+            adv);
 }
 
-int BMK_syntheticTest(int cLevel, double compressibility,
-                      const ZSTD_compressionParameters* compressionParams,
-                      int displayLevel, const BMK_advancedParams_t* adv)
+int BMK_syntheticTest(
+        int cLevel,
+        double compressibility,
+        const ZSTD_compressionParameters* compressionParams,
+        int displayLevel,
+        const BMK_advancedParams_t* adv)
 {
-    char nameBuff[20] = {0};
-    const char* name = nameBuff;
-    size_t const benchedSize = 10000000;
+    char nameBuff[20]        = { 0 };
+    const char* name         = nameBuff;
+    size_t const benchedSize = adv->blockSize ? adv->blockSize : 10000000;
     void* srcBuffer;
     BMK_benchOutcome_t res;
 
@@ -726,15 +942,26 @@ int BMK_syntheticTest(int cLevel, double compressibility,
         name = "Lorem ipsum";
     } else {
         RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
-        snprintf (nameBuff, sizeof(nameBuff), "Synthetic %2u%%", (unsigned)(compressibility*100));
+        snprintf(
+                nameBuff,
+                sizeof(nameBuff),
+                "Synthetic %2u%%",
+                (unsigned)(compressibility * 100));
     }
 
     /* Bench */
-    res = BMK_benchCLevel(srcBuffer, benchedSize,
-                    &benchedSize /* ? */, 1 /* ? */,
-                    cLevel, compressionParams,
-                    NULL, 0,  /* dictionary */
-                    displayLevel, name, adv);
+    res = BMK_benchCLevel(
+            srcBuffer,
+            benchedSize,
+            &benchedSize /* ? */,
+            1 /* ? */,
+            cLevel,
+            compressionParams,
+            NULL,
+            0, /* dictionary */
+            displayLevel,
+            name,
+            adv);
 
     /* clean up */
     free(srcBuffer);
@@ -742,16 +969,15 @@ int BMK_syntheticTest(int cLevel, double compressibility,
     return !BMK_isSuccessful_benchOutcome(res);
 }
 
-
-
 static size_t BMK_findMaxMem(U64 requiredMem)
 {
     size_t const step = 64 MB;
-    BYTE* testmem = NULL;
+    BYTE* testmem     = NULL;
 
     requiredMem = (((requiredMem >> 26) + 1) << 26);
     requiredMem += step;
-    if (requiredMem > maxMemory) requiredMem = maxMemory;
+    if (requiredMem > maxMemory)
+        requiredMem = maxMemory;
 
     do {
         testmem = (BYTE*)malloc((size_t)requiredMem);
@@ -765,53 +991,75 @@ static size_t BMK_findMaxMem(U64 requiredMem)
 /*! BMK_loadFiles() :
  *  Loads `buffer` with content of files listed within `fileNamesTable`.
  *  At most, fills `buffer` entirely. */
-static int BMK_loadFiles(void* buffer, size_t bufferSize,
-                         size_t* fileSizes,
-                         const char* const * fileNamesTable, unsigned nbFiles,
-                         int displayLevel)
+static int BMK_loadFiles(
+        void* buffer,
+        size_t bufferSize,
+        size_t* fileSizes,
+        const char* const* fileNamesTable,
+        unsigned nbFiles,
+        int displayLevel)
 {
     size_t pos = 0, totalSize = 0;
     unsigned n;
-    for (n=0; n bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n;   /* buffer too small - stop after this file */
-            {   size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
-                if (readSize != (size_t)fileSize) RETURN_ERROR_INT(11, "could not read %s", fileNamesTable[n]);
+            if (fileSize > bufferSize - pos)
+                fileSize = bufferSize - pos,
+                nbFiles  = n; /* buffer too small - stop after this file */
+            {
+                size_t const readSize =
+                        fread(((char*)buffer) + pos, 1, (size_t)fileSize, f);
+                if (readSize != (size_t)fileSize)
+                    RETURN_ERROR_INT(
+                            11, "could not read %s", fileNamesTable[n]);
                 pos += readSize;
             }
             fileSizes[n] = (size_t)fileSize;
             totalSize += (size_t)fileSize;
             fclose(f);
-    }   }
+        }
+    }
 
-    if (totalSize == 0) RETURN_ERROR_INT(12, "no data to bench");
+    if (totalSize == 0)
+        RETURN_ERROR_INT(12, "no data to bench");
     return 0;
 }
 
 int BMK_benchFilesAdvanced(
-                        const char* const * fileNamesTable, unsigned nbFiles,
-                        const char* dictFileName, int cLevel,
-                        const ZSTD_compressionParameters* compressionParams,
-                        int displayLevel, const BMK_advancedParams_t* adv)
+        const char* const* fileNamesTable,
+        unsigned nbFiles,
+        const char* dictFileName,
+        int cLevel,
+        const ZSTD_compressionParameters* compressionParams,
+        int displayLevel,
+        const BMK_advancedParams_t* adv)
 {
     void* srcBuffer = NULL;
     size_t benchedSize;
-    void* dictBuffer = NULL;
+    void* dictBuffer      = NULL;
     size_t dictBufferSize = 0;
-    size_t* fileSizes = NULL;
+    size_t* fileSizes     = NULL;
     BMK_benchOutcome_t res;
     U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
 
@@ -840,7 +1088,11 @@ int BMK_benchFilesAdvanced(
     if (dictFileName != NULL) {
         U64 const dictFileSize = UTIL_getFileSize(dictFileName);
         if (dictFileSize == UTIL_FILESIZE_UNKNOWN) {
-            DISPLAYLEVEL(1, "error loading %s : %s \n", dictFileName, strerror(errno));
+            DISPLAYLEVEL(
+                    1,
+                    "error loading %s : %s \n",
+                    dictFileName,
+                    strerror(errno));
             free(fileSizes);
             DISPLAYLEVEL(1, "benchmark aborted");
             return 17;
@@ -851,28 +1103,38 @@ int BMK_benchFilesAdvanced(
             return 18;
         }
         dictBufferSize = (size_t)dictFileSize;
-        dictBuffer = malloc(dictBufferSize);
-        if (dictBuffer==NULL) {
+        dictBuffer     = malloc(dictBufferSize);
+        if (dictBuffer == NULL) {
             free(fileSizes);
-            DISPLAYLEVEL(1, "not enough memory for dictionary (%u bytes)",
-                            (unsigned)dictBufferSize);
+            DISPLAYLEVEL(
+                    1,
+                    "not enough memory for dictionary (%u bytes)",
+                    (unsigned)dictBufferSize);
             return 19;
         }
 
-        {   int const errorCode = BMK_loadFiles(dictBuffer, dictBufferSize,
-                                                fileSizes, &dictFileName /*?*/,
-                                                1 /*?*/, displayLevel);
+        {
+            int const errorCode = BMK_loadFiles(
+                    dictBuffer,
+                    dictBufferSize,
+                    fileSizes,
+                    &dictFileName /*?*/,
+                    1 /*?*/,
+                    displayLevel);
             if (errorCode) {
                 res = BMK_benchOutcome_error();
                 goto _cleanUp;
-        }   }
+            }
+        }
     }
 
     /* Memory allocation & restrictions */
     benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
-    if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
+    if ((U64)benchedSize > totalSizeToLoad)
+        benchedSize = (size_t)totalSizeToLoad;
     if (benchedSize < totalSizeToLoad)
-        DISPLAY("Not enough memory; testing %u MB only...\n", (unsigned)(benchedSize >> 20));
+        DISPLAY("Not enough memory; testing %u MB only...\n",
+                (unsigned)(benchedSize >> 20));
 
     srcBuffer = benchedSize ? malloc(benchedSize) : NULL;
     if (!srcBuffer) {
@@ -883,25 +1145,41 @@ int BMK_benchFilesAdvanced(
     }
 
     /* Load input buffer */
-    {   int const errorCode = BMK_loadFiles(srcBuffer, benchedSize,
-                                        fileSizes, fileNamesTable, nbFiles,
-                                        displayLevel);
+    {
+        int const errorCode = BMK_loadFiles(
+                srcBuffer,
+                benchedSize,
+                fileSizes,
+                fileNamesTable,
+                nbFiles,
+                displayLevel);
         if (errorCode) {
             res = BMK_benchOutcome_error();
             goto _cleanUp;
-    }   }
+        }
+    }
 
     /* Bench */
-    {   char mfName[20] = {0};
-        snprintf (mfName, sizeof(mfName), " %u files", nbFiles);
-        {   const char* const displayName = (nbFiles > 1) ? mfName : fileNamesTable[0];
-            res = BMK_benchCLevel(srcBuffer, benchedSize,
-                                fileSizes, nbFiles,
-                                cLevel, compressionParams,
-                                dictBuffer, dictBufferSize,
-                                displayLevel, displayName,
-                                adv);
-    }   }
+    {
+        char mfName[20] = { 0 };
+        snprintf(mfName, sizeof(mfName), " %u files", nbFiles);
+        {
+            const char* const displayName =
+                    (nbFiles > 1) ? mfName : fileNamesTable[0];
+            res = BMK_benchCLevel(
+                    srcBuffer,
+                    benchedSize,
+                    fileSizes,
+                    nbFiles,
+                    cLevel,
+                    compressionParams,
+                    dictBuffer,
+                    dictBufferSize,
+                    displayLevel,
+                    displayName,
+                    adv);
+        }
+    }
 
 _cleanUp:
     free(srcBuffer);
@@ -910,12 +1188,21 @@ int BMK_benchFilesAdvanced(
     return !BMK_isSuccessful_benchOutcome(res);
 }
 
-
-int BMK_benchFiles(const char* const * fileNamesTable, unsigned nbFiles,
-                    const char* dictFileName,
-                    int cLevel, const ZSTD_compressionParameters* compressionParams,
-                    int displayLevel)
+int BMK_benchFiles(
+        const char* const* fileNamesTable,
+        unsigned nbFiles,
+        const char* dictFileName,
+        int cLevel,
+        const ZSTD_compressionParameters* compressionParams,
+        int displayLevel)
 {
     BMK_advancedParams_t const adv = BMK_initAdvancedParams();
-    return BMK_benchFilesAdvanced(fileNamesTable, nbFiles, dictFileName, cLevel, compressionParams, displayLevel, &adv);
+    return BMK_benchFilesAdvanced(
+            fileNamesTable,
+            nbFiles,
+            dictFileName,
+            cLevel,
+            compressionParams,
+            displayLevel,
+            &adv);
 }
diff --git a/programs/lorem.c b/programs/lorem.c
index 56e229058f9..79030c92f32 100644
--- a/programs/lorem.c
+++ b/programs/lorem.c
@@ -95,8 +95,8 @@ static const unsigned kNbWords = sizeof(kWords) / sizeof(kWords[0]);
 
 /* simple 1-dimension distribution, based on word's length, favors small words
  */
-static const int kWeights[]      = { 0, 8, 6, 4, 3, 2 };
-static const unsigned kNbWeights = sizeof(kWeights) / sizeof(kWeights[0]);
+static const int kWeights[]    = { 0, 8, 6, 4, 3, 2 };
+static const size_t kNbWeights = sizeof(kWeights) / sizeof(kWeights[0]);
 
 #define DISTRIB_SIZE_MAX 650
 static int g_distrib[DISTRIB_SIZE_MAX] = { 0 };
@@ -106,12 +106,12 @@ static void countFreqs(
         const char* words[],
         size_t nbWords,
         const int* weights,
-        unsigned long nbWeights)
+        size_t nbWeights)
 {
     unsigned total = 0;
     size_t w;
     for (w = 0; w < nbWords; w++) {
-        unsigned long len = strlen(words[w]);
+        size_t len = strlen(words[w]);
         int lmax;
         if (len >= nbWeights)
             len = nbWeights - 1;
@@ -126,12 +126,12 @@ static void init_word_distrib(
         const char* words[],
         size_t nbWords,
         const int* weights,
-        unsigned long nbWeights)
+        size_t nbWeights)
 {
     size_t w, d = 0;
     countFreqs(words, nbWords, weights, nbWeights);
     for (w = 0; w < nbWords; w++) {
-        unsigned long len = strlen(words[w]);
+        size_t len = strlen(words[w]);
         int l, lmax;
         if (len >= nbWeights)
             len = nbWeights - 1;

From 1e240af30a1d11ae45745c6c3e96307bad3771fd Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 20 Feb 2024 18:06:56 -0800
Subject: [PATCH 193/283] fix datagen size control

---
 tests/datagencli.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/datagencli.c b/tests/datagencli.c
index b3020995385..56616bef4b6 100644
--- a/tests/datagencli.c
+++ b/tests/datagencli.c
@@ -143,7 +143,6 @@ int main(int argc, const char** argv)
         LOREM_genOut(size, seed);
     }
 
-    RDG_genStdout(size, (double)probaU32 / 100, litProba, seed);
     DISPLAYLEVEL(3, "\n");
 
     return 0;

From c2d357033838c01c827fc10f0b2b850df339776a Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 20 Feb 2024 18:17:28 -0800
Subject: [PATCH 194/283] fix meson datagen build

---
 build/meson/tests/meson.build | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/build/meson/tests/meson.build b/build/meson/tests/meson.build
index 03480d20542..9847ab03738 100644
--- a/build/meson/tests/meson.build
+++ b/build/meson/tests/meson.build
@@ -44,7 +44,8 @@ testcommon_dep = declare_dependency(link_with: testcommon,
   dependencies: libzstd_deps,
   include_directories: libzstd_includes)
 
-datagen_sources = [join_paths(zstd_rootdir, 'tests/datagencli.c')]
+datagen_sources = [join_paths(zstd_rootdir, 'tests/datagencli.c'),
+  join_paths(zstd_rootdir, 'tests/loremOut.c')]
 datagen = executable('datagen',
   datagen_sources,
   c_args: [ '-DNDEBUG' ],

From 588dfbcc97657f1d70e711f3e22d8f992e14ae28 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 20 Feb 2024 19:21:01 -0800
Subject: [PATCH 195/283] fix c89 compatibility by removing snprintf()

note that this function has been in the code for a long while,
so why does it only start failing CI tests now ?
---
 programs/benchzstd.c | 57 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 3 deletions(-)

diff --git a/programs/benchzstd.c b/programs/benchzstd.c
index 56af23d33e7..d310af9248b 100644
--- a/programs/benchzstd.c
+++ b/programs/benchzstd.c
@@ -137,6 +137,57 @@ static const size_t maxMemory = (sizeof(size_t) == 4)
         return r;                                      \
     }
 
+/* replacement for snprintf(), which is not supported by C89
+ * sprintf() would be the supported one, but it's labelled unsafe,
+ * so some modern static analyzer will flag it as such, making it unusable.
+ * formatString_u() replaces snprintf() for the specific case where there are only %u arguments */
+static int formatString_u(char *buffer, size_t buffer_size, const char *formatString, unsigned int value)
+{
+    size_t written = 0;
+    int i;
+    assert(value <= 100);
+
+    for (i = 0; formatString[i] != '\0' && written < buffer_size - 1; ++i) {
+        if (formatString[i] != '%') {
+            buffer[written++] = formatString[i];
+            continue;
+        }
+
+        if (formatString[++i] == 'u') {
+            /* Handle single digit */
+            if (value < 10) {
+                buffer[written++] = '0' + value;
+            } else if (value < 100) {
+                /* Handle two digits */
+                if (written >= buffer_size - 2) {
+                    return -1; /* buffer overflow */
+                }
+                buffer[written++] = '0' + value / 10;
+                buffer[written++] = '0' + value % 10;
+            } else { /* 100 */
+                if (written >= buffer_size - 3) {
+                    return -1; /* buffer overflow */
+                }
+                buffer[written++] = '1';
+                buffer[written++] = '0';
+                buffer[written++] = '0';
+            }
+        } else if (formatString[i] == '%') { /* Check for escaped percent sign */
+            buffer[written++] = '%';
+        } else {
+            return -1; /* unsupported format */
+        }
+    }
+
+    if (written < buffer_size) {
+        buffer[written] = '\0';
+    } else {
+        buffer[0] = '\0'; /* Handle truncation */
+    }
+
+    return written;
+}
+
 /* *************************************
  *  Benchmark Parameters
  ***************************************/
@@ -942,10 +993,10 @@ int BMK_syntheticTest(
         name = "Lorem ipsum";
     } else {
         RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
-        snprintf(
+        formatString_u(
                 nameBuff,
                 sizeof(nameBuff),
-                "Synthetic %2u%%",
+                "Synthetic %u%%",
                 (unsigned)(compressibility * 100));
     }
 
@@ -1162,7 +1213,7 @@ int BMK_benchFilesAdvanced(
     /* Bench */
     {
         char mfName[20] = { 0 };
-        snprintf(mfName, sizeof(mfName), " %u files", nbFiles);
+        formatString_u(mfName, sizeof(mfName), " %u files", nbFiles);
         {
             const char* const displayName =
                     (nbFiles > 1) ? mfName : fileNamesTable[0];

From b34517a4402603e8210c24ceb7b976a360ef978b Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 20 Feb 2024 20:20:40 -0800
Subject: [PATCH 196/283] fix cmake build

---
 build/cmake/tests/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt
index 3226374a507..3ead070102f 100644
--- a/build/cmake/tests/CMakeLists.txt
+++ b/build/cmake/tests/CMakeLists.txt
@@ -50,7 +50,7 @@ set(PROGRAMS_DIR ${ZSTD_SOURCE_DIR}/programs)
 set(TESTS_DIR ${ZSTD_SOURCE_DIR}/tests)
 include_directories(${TESTS_DIR} ${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/compress ${LIBRARY_DIR}/dictBuilder)
 
-add_executable(datagen ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/datagencli.c)
+add_executable(datagen ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/lorem.c ${TESTS_DIR}/loremOut.c ${TESTS_DIR}/datagencli.c)
 target_link_libraries(datagen libzstd_static)
 
 #

From e62e15df190ebb41b0b9f1453b2a4e9bd6e05f51 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 20 Feb 2024 22:43:22 -0800
Subject: [PATCH 197/283] fix clangbuild

notably -Wconversion and -Wdocumentation
---
 lib/common/xxhash.h  | 43 -------------------------------------------
 programs/benchzstd.c | 10 +++++-----
 2 files changed, 5 insertions(+), 48 deletions(-)

diff --git a/lib/common/xxhash.h b/lib/common/xxhash.h
index 424ed19b8a0..e5ed3dc0443 100644
--- a/lib/common/xxhash.h
+++ b/lib/common/xxhash.h
@@ -630,7 +630,6 @@ XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, X
  * @brief The opaque state struct for the XXH32 streaming API.
  *
  * @see XXH32_state_s for details.
- * @see @ref streaming_example "Streaming Example"
  */
 typedef struct XXH32_state_s XXH32_state_t;
 
@@ -641,8 +640,6 @@ typedef struct XXH32_state_s XXH32_state_t;
  * @return `NULL` on failure.
  *
  * @note Must be freed with XXH32_freeState().
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
 /*!
@@ -654,8 +651,6 @@ XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
  *
  * @note @p statePtr must be allocated with XXH32_createState().
  *
- * @see @ref streaming_example "Streaming Example"
- *
  */
 XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
 /*!
@@ -681,8 +676,6 @@ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_
  * @return @ref XXH_ERROR on failure.
  *
  * @note This function resets and seeds a state. Call it before @ref XXH32_update().
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
 
@@ -704,8 +697,6 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t
  * @return @ref XXH_ERROR on failure.
  *
  * @note Call this to incrementally consume blocks of data.
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
 
@@ -722,8 +713,6 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void*
  * @note
  *   Calling XXH32_digest() will not affect @p statePtr, so you can update,
  *   digest, and update again.
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
 #endif /* !XXH_NO_STREAM */
@@ -900,7 +889,6 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size
  * @brief The opaque state struct for the XXH64 streaming API.
  *
  * @see XXH64_state_s for details.
- * @see @ref streaming_example "Streaming Example"
  */
 typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
 
@@ -911,8 +899,6 @@ typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
  * @return `NULL` on failure.
  *
  * @note Must be freed with XXH64_freeState().
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
 
@@ -924,8 +910,6 @@ XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
  * @return @ref XXH_OK.
  *
  * @note @p statePtr must be allocated with XXH64_createState().
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
 
@@ -952,8 +936,6 @@ XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const
  * @return @ref XXH_ERROR on failure.
  *
  * @note This function resets and seeds a state. Call it before @ref XXH64_update().
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);
 
@@ -975,8 +957,6 @@ XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH_NOESCAPE XXH64_state_t* statePtr,
  * @return @ref XXH_ERROR on failure.
  *
  * @note Call this to incrementally consume blocks of data.
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
 
@@ -993,8 +973,6 @@ XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr,
  * @note
  *   Calling XXH64_digest() will not affect @p statePtr, so you can update,
  *   digest, and update again.
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr);
 #endif /* !XXH_NO_STREAM */
@@ -1199,7 +1177,6 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const
  * @brief The opaque state struct for the XXH3 streaming API.
  *
  * @see XXH3_state_s for details.
- * @see @ref streaming_example "Streaming Example"
  */
 typedef struct XXH3_state_s XXH3_state_t;
 XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
@@ -1231,8 +1208,6 @@ XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOE
  *   - Call this function before @ref XXH3_64bits_update().
  *   - Digest will be equivalent to `XXH3_64bits()`.
  *
- * @see @ref streaming_example "Streaming Example"
- *
  */
 XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
 
@@ -1253,8 +1228,6 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* stateP
  *   - Call this function before @ref XXH3_64bits_update().
  *   - Digest will be equivalent to `XXH3_64bits_withSeed()`.
  *
- * @see @ref streaming_example "Streaming Example"
- *
  */
 XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
 
@@ -1279,8 +1252,6 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_
  * (secret's content should look like a bunch of random bytes).
  * When in doubt about the randomness of a candidate `secret`,
  * consider employing `XXH3_generateSecret()` instead (see below).
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
 
@@ -1302,8 +1273,6 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_stat
  * @return @ref XXH_ERROR on failure.
  *
  * @note Call this to incrementally consume blocks of data.
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
 
@@ -1320,8 +1289,6 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* stat
  * @note
  *   Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update,
  *   digest, and update again.
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_PUREF XXH64_hash_t  XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
 #endif /* !XXH_NO_STREAM */
@@ -1442,8 +1409,6 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE cons
  *   - This function resets `statePtr` and generate a secret with default parameters.
  *   - Call it before @ref XXH3_128bits_update().
  *   - Digest will be equivalent to `XXH3_128bits()`.
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
 
@@ -1463,8 +1428,6 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* state
  *   - This function resets `statePtr` and generate a secret from `seed`.
  *   - Call it before @ref XXH3_128bits_update().
  *   - Digest will be equivalent to `XXH3_128bits_withSeed()`.
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
 /*!
@@ -1486,8 +1449,6 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state
  * (secret's content should look like a bunch of random bytes).
  * When in doubt about the randomness of a candidate `secret`,
  * consider employing `XXH3_generateSecret()` instead (see below).
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
 
@@ -6072,8 +6033,6 @@ static void XXH_alignedFree(void* p)
  * @return `NULL` on failure.
  *
  * @note Must be freed with XXH3_freeState().
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
 {
@@ -6092,8 +6051,6 @@ XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
  * @return @ref XXH_OK.
  *
  * @note Must be allocated with XXH3_createState().
- *
- * @see @ref streaming_example "Streaming Example"
  */
 XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
 {
diff --git a/programs/benchzstd.c b/programs/benchzstd.c
index d310af9248b..8ab9c0c2dc0 100644
--- a/programs/benchzstd.c
+++ b/programs/benchzstd.c
@@ -141,7 +141,7 @@ static const size_t maxMemory = (sizeof(size_t) == 4)
  * sprintf() would be the supported one, but it's labelled unsafe,
  * so some modern static analyzer will flag it as such, making it unusable.
  * formatString_u() replaces snprintf() for the specific case where there are only %u arguments */
-static int formatString_u(char *buffer, size_t buffer_size, const char *formatString, unsigned int value)
+static int formatString_u(char* buffer, size_t buffer_size, const char* formatString, unsigned int value)
 {
     size_t written = 0;
     int i;
@@ -156,14 +156,14 @@ static int formatString_u(char *buffer, size_t buffer_size, const char *formatSt
         if (formatString[++i] == 'u') {
             /* Handle single digit */
             if (value < 10) {
-                buffer[written++] = '0' + value;
+                buffer[written++] = '0' + (char)value;
             } else if (value < 100) {
                 /* Handle two digits */
                 if (written >= buffer_size - 2) {
                     return -1; /* buffer overflow */
                 }
-                buffer[written++] = '0' + value / 10;
-                buffer[written++] = '0' + value % 10;
+                buffer[written++] = '0' + (char)(value / 10);
+                buffer[written++] = '0' + (char)(value % 10);
             } else { /* 100 */
                 if (written >= buffer_size - 3) {
                     return -1; /* buffer overflow */
@@ -185,7 +185,7 @@ static int formatString_u(char *buffer, size_t buffer_size, const char *formatSt
         buffer[0] = '\0'; /* Handle truncation */
     }
 
-    return written;
+    return (int)written;
 }
 
 /* *************************************

From 9e711c9360d8ebf17132e750b3fe24f79fc63a6d Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 20 Feb 2024 22:59:58 -0800
Subject: [PATCH 198/283] fix Visual Studio datagen recipe

---
 build/VS2010/datagen/datagen.vcxproj | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/build/VS2010/datagen/datagen.vcxproj b/build/VS2010/datagen/datagen.vcxproj
index a66358a0d3a..aaba4788b9c 100644
--- a/build/VS2010/datagen/datagen.vcxproj
+++ b/build/VS2010/datagen/datagen.vcxproj
@@ -157,6 +157,8 @@
   
     
     
+    
+    
     
   
   

From 7170f51dd277d4aa4a675ffdd5593af362abe83c Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 20 Feb 2024 23:36:04 -0800
Subject: [PATCH 199/283] fix include order

---
 programs/benchzstd.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/programs/benchzstd.c b/programs/benchzstd.c
index 8ab9c0c2dc0..32227669f8f 100644
--- a/programs/benchzstd.c
+++ b/programs/benchzstd.c
@@ -18,14 +18,16 @@
 /* *************************************
  *  Includes
  ***************************************/
+/* this must be included first */
+#include "platform.h" /* Large Files support, compiler specifics */
+
+/* then following system includes */
 #include  /* assert */
 #include 
 #include     /* fprintf, fopen */
 #include    /* malloc, free */
 #include    /* memset, strerror */
-#include "platform.h" /* Large Files support */
 #include "util.h"     /* UTIL_getFileSize, UTIL_sleep */
-
 #include "../lib/common/mem.h"
 #include "benchfn.h"
 #include "timefn.h" /* UTIL_time_t */

From 0a68be83e7cb84c8212f666b4b4aa6e0dc8477cc Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Wed, 21 Feb 2024 00:22:04 -0800
Subject: [PATCH 200/283] updated setup-msys2 to v2.22.0

following a warning in recent test reports

```
Node.js 16 actions are deprecated. Please update the following actions to use Node.js 20: msys2/setup-msys2@5beef6d11f48bba68b9eb503e3adc60b23c0cc36. For more information see: https://github.blog/changelog/2023-09-22-github-actions-transitioning-from-node-16-to-node-20/.
```
---
 .github/workflows/dev-long-tests.yml  | 2 +-
 .github/workflows/dev-short-tests.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml
index 29db7316ea3..1e411a86d04 100644
--- a/.github/workflows/dev-long-tests.yml
+++ b/.github/workflows/dev-long-tests.yml
@@ -247,7 +247,7 @@ jobs:
         shell: msys2 {0}
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
-    - uses: msys2/setup-msys2@5beef6d11f48bba68b9eb503e3adc60b23c0cc36 # tag=v2
+    - uses: msys2/setup-msys2@cc11e9188b693c2b100158c3322424c4cc1dadea # tag=v2.22.0
       with:
         msystem: MINGW64
         install: make
diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml
index 3123e129bc3..5f49bacaefa 100644
--- a/.github/workflows/dev-short-tests.yml
+++ b/.github/workflows/dev-short-tests.yml
@@ -448,7 +448,7 @@ jobs:
         shell: msys2 {0}
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
-    - uses: msys2/setup-msys2@5beef6d11f48bba68b9eb503e3adc60b23c0cc36 # tag=v2
+    - uses: msys2/setup-msys2@cc11e9188b693c2b100158c3322424c4cc1dadea # tag=v2.22.0
       with:
         msystem: ${{ matrix.msystem }}
         install: make diffutils

From 68a232c5917ff387031c76acea80f77e8115419f Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 23 Feb 2024 13:13:03 -0800
Subject: [PATCH 201/283] benchmark more can test targetCBlockSize

---
 programs/benchzstd.c | 3 +++
 programs/benchzstd.h | 1 +
 programs/zstd.1.md   | 4 ++++
 programs/zstdcli.c   | 1 +
 4 files changed, 9 insertions(+)

diff --git a/programs/benchzstd.c b/programs/benchzstd.c
index 32227669f8f..29ee595c174 100644
--- a/programs/benchzstd.c
+++ b/programs/benchzstd.c
@@ -200,6 +200,7 @@ BMK_advancedParams_t BMK_initAdvancedParams(void)
         BMK_both,               /* mode */
         BMK_TIMETEST_DEFAULT_S, /* nbSeconds */
         0,                      /* blockSize */
+        0,               /* targetCBlockSize */
         0,                      /* nbWorkers */
         0,                      /* realTime */
         0,                      /* additionalParam */
@@ -275,6 +276,8 @@ static void BMK_initCCtx(
             (int)adv->literalCompressionMode));
     CHECK_Z(ZSTD_CCtx_setParameter(
             ctx, ZSTD_c_strategy, (int)comprParams->strategy));
+    CHECK_Z(ZSTD_CCtx_setParameter(
+            ctx, ZSTD_c_targetCBlockSize, (int)adv->targetCBlockSize));
     CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize));
 }
 
diff --git a/programs/benchzstd.h b/programs/benchzstd.h
index cdb6101c201..ad3088cd43b 100644
--- a/programs/benchzstd.h
+++ b/programs/benchzstd.h
@@ -100,6 +100,7 @@ typedef struct {
     BMK_mode_t mode;        /* 0: all, 1: compress only 2: decode only */
     unsigned nbSeconds;     /* default timing is in nbSeconds */
     size_t blockSize;       /* Maximum size of each block*/
+    size_t targetCBlockSize;/* Approximative size of compressed blocks */
     int nbWorkers;          /* multithreading */
     unsigned realTime;      /* real time priority */
     int additionalParam;    /* used by python speed benchmark */
diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index 231341b2a5b..9a4a5df2c6d 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -218,6 +218,10 @@ the last one takes effect.
     expected. This feature allows for controlling the guess when needed.
     Exact guesses result in better compression ratios. Overestimates result in slightly
     degraded compression ratios, while underestimates may result in significant degradation.
+* `--target-compressed-block-size=#`:
+    Attempt to produce compressed blocks of approximately this size.
+    This will split larger blocks in order to approach this target.
+    Notably useful to improve latency when the receiver can make use of early data sooner.
 * `-o FILE`:
     save result into `FILE`.
 * `-f`, `--force`:
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index dd21021b065..85d0e12d7d0 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -1374,6 +1374,7 @@ int main(int argCount, const char* argv[])
             CLEAN_RETURN(1);
         }
         benchParams.blockSize = blockSize;
+        benchParams.targetCBlockSize = targetCBlockSize;
         benchParams.nbWorkers = (int)nbWorkers;
         benchParams.realTime = (unsigned)setRealTimePrio;
         benchParams.nbSeconds = bench_nbSeconds;

From cc4530924b42c5d138f871c33726d374e2778ad3 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 23 Feb 2024 14:03:26 -0800
Subject: [PATCH 202/283] speed optimized version of targetCBlockSize

note that the size of individual compressed blocks will vary more wildly with this modification.
But it seems good enough for a first test, and fix the speed regression issue.
Further refinements can be attempted later.
---
 lib/compress/zstd_compress_superblock.c | 112 ++++++++++++------------
 programs/zstd.1.md                      |   5 +-
 2 files changed, 58 insertions(+), 59 deletions(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index dacaf85dbc2..5b22da02666 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -122,7 +122,7 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
     }
     *entropyWritten = 1;
     DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
-    return op-ostart;
+    return (size_t)(op-ostart);
 }
 
 static size_t
@@ -187,7 +187,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
     else
         op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
     if (nbSeq==0) {
-        return op - ostart;
+        return (size_t)(op - ostart);
     }
 
     /* seqHead : flags for FSE encoding type */
@@ -209,7 +209,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
     }
 
     {   size_t const bitstreamSize = ZSTD_encodeSequences(
-                                        op, oend - op,
+                                        op, (size_t)(oend - op),
                                         fseTables->matchlengthCTable, mlCode,
                                         fseTables->offcodeCTable, ofCode,
                                         fseTables->litlengthCTable, llCode,
@@ -253,7 +253,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
 #endif
 
     *entropyWritten = 1;
-    return op - ostart;
+    return (size_t)(op - ostart);
 }
 
 /** ZSTD_compressSubBlock() :
@@ -296,11 +296,11 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
         op += cSeqSize;
     }
     /* Write block header */
-    {   size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
+    {   size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
         U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
         MEM_writeLE24(ostart, cBlockHeader24);
     }
-    return op-ostart;
+    return (size_t)(op-ostart);
 }
 
 static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
@@ -419,6 +419,16 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
     return 0;
 }
 
+static size_t countLiterals(const seqDef* sp, size_t seqCount)
+{
+    size_t n, total = 0;
+    assert(sp != NULL);
+    for (n=0; nsequencesStart;
     const seqDef* const send = seqStorePtr->sequences;
-    const seqDef* sp = sstart;
+    size_t const nbSeqs = (size_t)(send - sstart);
+    size_t nbSeqsPerBlock = nbSeqs;
     const BYTE* const lstart = seqStorePtr->litStart;
     const BYTE* const lend = seqStorePtr->lit;
     const BYTE* lp = lstart;
+    size_t const nbLiterals = (size_t)(lend - lstart);
     BYTE const* ip = (BYTE const*)src;
     BYTE const* const iend = ip + srcSize;
     BYTE* const ostart = (BYTE*)dst;
@@ -451,52 +463,50 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
     const BYTE* mlCodePtr = seqStorePtr->mlCode;
     const BYTE* ofCodePtr = seqStorePtr->ofCode;
     size_t targetCBlockSize = cctxParams->targetCBlockSize;
-    size_t litSize, seqCount;
     int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
     int writeSeqEntropy = 1;
-    int lastSequence = 0;
+    size_t nbSubBlocks = 1;
 
     DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
                 (unsigned)(lend-lp), (unsigned)(send-sstart));
 
-    litSize = 0;
-    seqCount = 0;
-    do {
-        size_t cBlockSizeEstimate = 0;
-        if (sstart == send) {
-            lastSequence = 1;
-        } else {
-            const seqDef* const sequence = sp + seqCount;
-            lastSequence = sequence == send - 1;
-            litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
-            seqCount++;
-        }
-        if (lastSequence) {
-            assert(lp <= lend);
-            assert(litSize <= (size_t)(lend - lp));
-            litSize = (size_t)(lend - lp);
-        }
-        /* I think there is an optimization opportunity here.
-         * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
-         * since it recalculates estimate from scratch.
-         * For example, it would recount literal distribution and symbol codes every time.
-         */
-        cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
-                                                       &nextCBlock->entropy, entropyMetadata,
-                                                       workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
-        if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
+    /* let's start by a general estimation for the full block */
+    {   size_t const cBlockSizeEstimate =
+                ZSTD_estimateSubBlockSize(lp, nbLiterals,
+                                        ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
+                                        &nextCBlock->entropy, entropyMetadata,
+                                        workspace, wkspSize,
+                                        writeLitEntropy, writeSeqEntropy);
+        /* quick estimation */
+        nbSubBlocks = (cBlockSizeEstimate + (targetCBlockSize-1)) / targetCBlockSize;
+        assert(nbSubBlocks > 0);
+        nbSeqsPerBlock = nbSeqs / nbSubBlocks;
+        /* Note: this is very approximative. Obviously, some sub-blocks will be larger and others faster.
+         * But the contract of this feature has always been approximative, so for now we'll leverage it for speed.
+         * It can be refined later, for closer-to-target compressed block size, if it ever matters. */
+    }
+
+    /* write sub-blocks */
+    {   size_t n;
+        for (n=0; n < nbSubBlocks; n++) {
+            const seqDef* sp = sstart + n*nbSeqsPerBlock;
+            int lastSubBlock = (n==nbSubBlocks-1);
+            size_t const nbSeqsLastSubBlock = nbSeqs - (nbSubBlocks-1) * nbSeqsPerBlock;
+            size_t seqCount = lastSubBlock ? nbSeqsLastSubBlock : nbSeqsPerBlock;
+            size_t litSize = lastSubBlock ? (size_t)(lend-lp) : countLiterals(sp, seqCount);
             int litEntropyWritten = 0;
             int seqEntropyWritten = 0;
-            const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
+            const size_t decompressedSize =
+                    ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSubBlock);
             const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
                                                        sp, seqCount,
                                                        lp, litSize,
                                                        llCodePtr, mlCodePtr, ofCodePtr,
                                                        cctxParams,
-                                                       op, oend-op,
+                                                       op, (size_t)(oend-op),
                                                        bmi2, writeLitEntropy, writeSeqEntropy,
                                                        &litEntropyWritten, &seqEntropyWritten,
-                                                       lastBlock && lastSequence);
+                                                       lastBlock && lastSubBlock);
             FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
             if (cSize > 0 && cSize < decompressedSize) {
                 DEBUGLOG(5, "Committed the sub-block");
@@ -519,7 +529,8 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
                 }
             }
         }
-    } while (!lastSequence);
+    }
+
     if (writeLitEntropy) {
         DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
         ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
@@ -531,25 +542,10 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
         DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
         return 0;
     }
-    if (ip < iend) {
-        size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
-        DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
-        FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
-        assert(cSize != 0);
-        op += cSize;
-        /* We have to regenerate the repcodes because we've skipped some sequences */
-        if (sp < send) {
-            seqDef const* seq;
-            repcodes_t rep;
-            ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
-            for (seq = sstart; seq < sp; ++seq) {
-                ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
-            }
-            ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
-        }
-    }
-    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
-    return op-ostart;
+    assert(ip == iend); (void)iend;
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed: %u subBlocks, total compressed size = %u",
+                (unsigned)nbSubBlocks, (unsigned)(op-ostart));
+    return (size_t)(op-ostart);
 }
 
 size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index 9a4a5df2c6d..078455f3e4e 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -221,7 +221,10 @@ the last one takes effect.
 * `--target-compressed-block-size=#`:
     Attempt to produce compressed blocks of approximately this size.
     This will split larger blocks in order to approach this target.
-    Notably useful to improve latency when the receiver can make use of early data sooner.
+    This feature is notably useful for improved latency, when the receiver can leverage receiving early incomplete data.
+    This parameter defines a loose target: compressed blocks will target this size "on average", but individual blocks can still be larger or smaller.
+    Enabling this feature can decrease compression speed by up to ~10% at level 1.
+    Higher levels will see smaller relative speed regression, becoming invisible at higher settings.
 * `-o FILE`:
     save result into `FILE`.
 * `-f`, `--force`:

From 6b11fc436c3001cb9beb07627e7b434aab97b4b1 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 23 Feb 2024 14:53:56 -0800
Subject: [PATCH 203/283] fix issue with incompressible sections

---
 lib/compress/zstd_compress_superblock.c | 91 ++++++++++++++++---------
 tests/fuzzer.c                          |  4 +-
 2 files changed, 60 insertions(+), 35 deletions(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index 5b22da02666..2bb2e220839 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -136,7 +136,7 @@ ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
     size_t matchLengthSum = 0;
     size_t litLengthSum = 0;
     (void)(litLengthSum); /* suppress unused variable warning on some environments */
-    while (send-sp > 0) {
+    while (sp < send) {
         ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
         litLengthSum += seqLen.litLength;
         matchLengthSum += seqLen.matchLength;
@@ -462,7 +462,8 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
     const BYTE* llCodePtr = seqStorePtr->llCode;
     const BYTE* mlCodePtr = seqStorePtr->mlCode;
     const BYTE* ofCodePtr = seqStorePtr->ofCode;
-    size_t targetCBlockSize = cctxParams->targetCBlockSize;
+    size_t const minTarget = 2 KB; /* enforce minimum size to avoid undesirable side effects */
+    size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
     int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
     int writeSeqEntropy = 1;
     size_t nbSubBlocks = 1;
@@ -470,8 +471,13 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
     DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
                 (unsigned)(lend-lp), (unsigned)(send-sstart));
 
-    /* let's start by a general estimation for the full block */
-    {   size_t const cBlockSizeEstimate =
+    if (nbSeqs == 0) {
+        /* special case : no sequence */
+        nbSeqsPerBlock = 0;
+        nbSubBlocks = 1;
+    } else {
+        /* let's start by a general estimation for the full block */
+        size_t const cBlockSizeEstimate =
                 ZSTD_estimateSubBlockSize(lp, nbLiterals,
                                         ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
                                         &nextCBlock->entropy, entropyMetadata,
@@ -480,8 +486,13 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
         /* quick estimation */
         nbSubBlocks = (cBlockSizeEstimate + (targetCBlockSize-1)) / targetCBlockSize;
         assert(nbSubBlocks > 0);
-        nbSeqsPerBlock = nbSeqs / nbSubBlocks;
-        /* Note: this is very approximative. Obviously, some sub-blocks will be larger and others faster.
+        if (nbSeqs > nbSubBlocks) {
+            nbSeqsPerBlock = nbSeqs / nbSubBlocks;
+        } else {
+            nbSeqsPerBlock = 1;
+            nbSubBlocks = nbSeqs;
+        }
+        /* Note: this is very approximative. Obviously, some sub-blocks will be larger and others smaller.
          * But the contract of this feature has always been approximative, so for now we'll leverage it for speed.
          * It can be refined later, for closer-to-target compressed block size, if it ever matters. */
     }
@@ -498,36 +509,50 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
             int seqEntropyWritten = 0;
             const size_t decompressedSize =
                     ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSubBlock);
-            const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
-                                                       sp, seqCount,
-                                                       lp, litSize,
-                                                       llCodePtr, mlCodePtr, ofCodePtr,
-                                                       cctxParams,
-                                                       op, (size_t)(oend-op),
-                                                       bmi2, writeLitEntropy, writeSeqEntropy,
-                                                       &litEntropyWritten, &seqEntropyWritten,
-                                                       lastBlock && lastSubBlock);
+            size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+                                               sp, seqCount,
+                                               lp, litSize,
+                                               llCodePtr, mlCodePtr, ofCodePtr,
+                                               cctxParams,
+                                               op, (size_t)(oend-op),
+                                               bmi2, writeLitEntropy, writeSeqEntropy,
+                                               &litEntropyWritten, &seqEntropyWritten,
+                                               lastBlock && lastSubBlock);
             FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
-            if (cSize > 0 && cSize < decompressedSize) {
-                DEBUGLOG(5, "Committed the sub-block");
-                assert(ip + decompressedSize <= iend);
-                ip += decompressedSize;
-                sp += seqCount;
-                lp += litSize;
-                op += cSize;
-                llCodePtr += seqCount;
-                mlCodePtr += seqCount;
-                ofCodePtr += seqCount;
-                litSize = 0;
-                seqCount = 0;
-                /* Entropy only needs to be written once */
-                if (litEntropyWritten) {
-                    writeLitEntropy = 0;
-                }
-                if (seqEntropyWritten) {
-                    writeSeqEntropy = 0;
+
+            if (cSize == 0 || cSize >= decompressedSize) {
+                cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, decompressedSize, lastBlock);
+                DEBUGLOG(5, "send an uncompressed sub-block of %u bytes", (unsigned)(decompressedSize));
+                FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+                assert(cSize != 0);
+                /* We have to regenerate the repcodes because we've skipped some sequences */
+                if (sp < send) {
+                    seqDef const* seq;
+                    repcodes_t rep;
+                    ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
+                    for (seq = sstart; seq < sp; ++seq) {
+                        ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
+                    }
+                    ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
                 }
             }
+
+            DEBUGLOG(5, "Committed the sub-block");
+            assert(ip + decompressedSize <= iend);
+            ip += decompressedSize;
+            sp += seqCount;
+            lp += litSize;
+            op += cSize;
+            llCodePtr += seqCount;
+            mlCodePtr += seqCount;
+            ofCodePtr += seqCount;
+            /* Entropy only needs to be written once */
+            if (litEntropyWritten) {
+                writeLitEntropy = 0;
+            }
+            if (seqEntropyWritten) {
+                writeSeqEntropy = 0;
+            }
         }
     }
 
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index d70a669047b..6c1f58df7db 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -952,7 +952,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
         ZSTD_freeCDict(cdict);
         ZSTD_freeCCtx(cctx);
     }
-    
+
     DISPLAYLEVEL(3, "test%3i : maxBlockSize = 2K", testNb++);
     {
         ZSTD_CCtx* cctx = ZSTD_createCCtx();
@@ -1374,7 +1374,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
     }
     DISPLAYLEVEL(3, "OK \n");
 
-    DISPLAYLEVEL(3, "test%3d: superblock uncompressible data, too many nocompress superblocks : ", testNb++);
+    DISPLAYLEVEL(3, "test%3d : superblock uncompressible data: too many nocompress superblocks : ", testNb++);
     {
         ZSTD_CCtx* const cctx = ZSTD_createCCtx();
         const BYTE* src = (BYTE*)CNBuffer; BYTE* dst = (BYTE*)compressedBuffer;

From 3b401000580a3e605694055834dcd254fa36202e Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 23 Feb 2024 15:35:12 -0800
Subject: [PATCH 204/283] fix long sequences (> 64 KB)

---
 lib/compress/zstd_compress_superblock.c | 13 ++++++++-----
 tests/Makefile                          |  2 +-
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index 2bb2e220839..239072623d6 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -136,6 +136,7 @@ ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
     size_t matchLengthSum = 0;
     size_t litLengthSum = 0;
     (void)(litLengthSum); /* suppress unused variable warning on some environments */
+    DEBUGLOG(6, "ZSTD_seqDecompressedSize (%u sequences from %p) (last==%i)", (unsigned)nbSeq, sp, lastSequence);
     while (sp < send) {
         ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
         litLengthSum += seqLen.litLength;
@@ -279,7 +280,8 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
                 litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
     {   size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
                                                         &entropyMetadata->hufMetadata, literals, litSize,
-                                                        op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
+                                                        op, (size_t)(oend-op),
+                                                        bmi2, writeLitEntropy, litEntropyWritten);
         FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
         if (cLitSize == 0) return 0;
         op += cLitSize;
@@ -289,7 +291,7 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
                                                   sequences, nbSeq,
                                                   llCode, mlCode, ofCode,
                                                   cctxParams,
-                                                  op, oend-op,
+                                                  op, (size_t)(oend-op),
                                                   bmi2, writeSeqEntropy, seqEntropyWritten);
         FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
         if (cSeqSize == 0) return 0;
@@ -419,13 +421,14 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
     return 0;
 }
 
-static size_t countLiterals(const seqDef* sp, size_t seqCount)
+static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t seqCount)
 {
     size_t n, total = 0;
     assert(sp != NULL);
     for (n=0; n %zu bytes", seqCount, sp, total);
     return total;
 }
 
@@ -504,7 +507,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
             int lastSubBlock = (n==nbSubBlocks-1);
             size_t const nbSeqsLastSubBlock = nbSeqs - (nbSubBlocks-1) * nbSeqsPerBlock;
             size_t seqCount = lastSubBlock ? nbSeqsLastSubBlock : nbSeqsPerBlock;
-            size_t litSize = lastSubBlock ? (size_t)(lend-lp) : countLiterals(sp, seqCount);
+            size_t litSize = lastSubBlock ? (size_t)(lend-lp) : countLiterals(seqStorePtr, sp, seqCount);
             int litEntropyWritten = 0;
             int seqEntropyWritten = 0;
             const size_t decompressedSize =
diff --git a/tests/Makefile b/tests/Makefile
index ed7638b743f..700c64b1d17 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -314,7 +314,7 @@ check: shortest
 fuzztest: test-fuzzer test-zstream test-decodecorpus
 
 .PHONY: test
-test: test-zstd test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy test-decodecorpus test-cli-tests
+test: test-zstd test-cli-tests test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy test-decodecorpus
 ifeq ($(QEMU_SYS),)
 test: test-pool
 endif

From 0591e7eea118eccb6b8ceef00296bedaad3d7e9e Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 23 Feb 2024 16:05:09 -0800
Subject: [PATCH 205/283] minor: fix overly cautious conversion warning

---
 lib/compress/zstd_compress_superblock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index 239072623d6..d31bed32ef6 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -136,7 +136,7 @@ ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
     size_t matchLengthSum = 0;
     size_t litLengthSum = 0;
     (void)(litLengthSum); /* suppress unused variable warning on some environments */
-    DEBUGLOG(6, "ZSTD_seqDecompressedSize (%u sequences from %p) (last==%i)", (unsigned)nbSeq, sp, lastSequence);
+    DEBUGLOG(6, "ZSTD_seqDecompressedSize (%u sequences from %p) (last==%i)", (unsigned)nbSeq, (const void*)sp, lastSequence);
     while (sp < send) {
         ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
         litLengthSum += seqLen.litLength;
@@ -428,7 +428,7 @@ static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t
     for (n=0; n %zu bytes", seqCount, sp, total);
+    DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
     return total;
 }
 

From 6719794379ada9cc33cae486a6fea4930eda481c Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 23 Feb 2024 18:48:29 -0800
Subject: [PATCH 206/283] fixed some regressionTests

but not all
---
 lib/compress/zstd_compress_superblock.c | 13 ++++++++-----
 tests/fuzz/dictionary_round_trip.c      | 12 ++++++------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index d31bed32ef6..09219d007bc 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -467,7 +467,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
     const BYTE* ofCodePtr = seqStorePtr->ofCode;
     size_t const minTarget = 2 KB; /* enforce minimum size to avoid undesirable side effects */
     size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
-    int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
+    int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
     int writeSeqEntropy = 1;
     size_t nbSubBlocks = 1;
 
@@ -523,9 +523,11 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
                                                lastBlock && lastSubBlock);
             FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
 
-            if (cSize == 0 || cSize >= decompressedSize) {
+            if (cSize == 0 || (cSize >= decompressedSize && n>0)) {
+                litEntropyWritten = 0;
+                seqEntropyWritten = 0;
                 cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, decompressedSize, lastBlock);
-                DEBUGLOG(5, "send an uncompressed sub-block of %u bytes", (unsigned)(decompressedSize));
+                DEBUGLOG(5, "Generate uncompressed sub-block of %u bytes", (unsigned)(decompressedSize));
                 FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
                 assert(cSize != 0);
                 /* We have to regenerate the repcodes because we've skipped some sequences */
@@ -578,8 +580,9 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
 
 size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
                                void* dst, size_t dstCapacity,
-                               void const* src, size_t srcSize,
-                               unsigned lastBlock) {
+                               const void* src, size_t srcSize,
+                               unsigned lastBlock)
+{
     ZSTD_entropyCTablesMetadata_t entropyMetadata;
 
     FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c
index 06fdf24e9fb..0470fbf53ad 100644
--- a/tests/fuzz/dictionary_round_trip.c
+++ b/tests/fuzz/dictionary_round_trip.c
@@ -23,13 +23,13 @@
 #include "fuzz_data_producer.h"
 #include "fuzz_third_party_seq_prod.h"
 
-static ZSTD_CCtx *cctx = NULL;
-static ZSTD_DCtx *dctx = NULL;
+static ZSTD_CCtx* cctx = NULL;
+static ZSTD_DCtx* dctx = NULL;
 
-static size_t roundTripTest(void *result, size_t resultCapacity,
-                            void *compressed, size_t compressedCapacity,
-                            const void *src, size_t srcSize,
-                            FUZZ_dataProducer_t *producer)
+static size_t roundTripTest(void* result, size_t resultCapacity,
+                            void* compressed, size_t compressedCapacity,
+                            const void* src, size_t srcSize,
+                            FUZZ_dataProducer_t* producer)
 {
     ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto;
     FUZZ_dict_t dict = FUZZ_train(src, srcSize, producer);

From 4b5152641239c571ae6cd67ae74cc87776e21362 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 24 Feb 2024 01:24:58 -0800
Subject: [PATCH 207/283] fix partial block uncompressed

---
 lib/compress/zstd_compress_superblock.c | 122 +++++++++++++-----------
 lib/decompress/zstd_decompress.c        |  11 ++-
 lib/decompress/zstd_decompress_block.c  |   2 +-
 3 files changed, 73 insertions(+), 62 deletions(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index 09219d007bc..b35d9ad07db 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -127,26 +127,25 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
 
 static size_t
 ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
-                   const seqDef* sequences, size_t nbSeq,
-                         size_t litSize, int lastSequence)
+                   const seqDef* sequences, size_t nbSeqs,
+                         size_t litSize, int lastSubBlock)
 {
-    const seqDef* const sstart = sequences;
-    const seqDef* const send = sequences + nbSeq;
-    const seqDef* sp = sstart;
     size_t matchLengthSum = 0;
     size_t litLengthSum = 0;
-    (void)(litLengthSum); /* suppress unused variable warning on some environments */
-    DEBUGLOG(6, "ZSTD_seqDecompressedSize (%u sequences from %p) (last==%i)", (unsigned)nbSeq, (const void*)sp, lastSequence);
-    while (sp < send) {
-        ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
+    size_t n;
+    for (n=0; n %zu bytes", seqCount, (const void*)sp, total);
+    DEBUGLOG(5, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
     return total;
 }
 
@@ -451,6 +450,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
 {
     const seqDef* const sstart = seqStorePtr->sequencesStart;
     const seqDef* const send = seqStorePtr->sequences;
+    const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
     size_t const nbSeqs = (size_t)(send - sstart);
     size_t nbSeqsPerBlock = nbSeqs;
     const BYTE* const lstart = seqStorePtr->litStart;
@@ -471,8 +471,8 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
     int writeSeqEntropy = 1;
     size_t nbSubBlocks = 1;
 
-    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
-                (unsigned)(lend-lp), (unsigned)(send-sstart));
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
+               (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
 
     if (nbSeqs == 0) {
         /* special case : no sequence */
@@ -502,17 +502,18 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
 
     /* write sub-blocks */
     {   size_t n;
+        size_t nbSeqsToProcess = 0;
         for (n=0; n < nbSubBlocks; n++) {
-            const seqDef* sp = sstart + n*nbSeqsPerBlock;
-            int lastSubBlock = (n==nbSubBlocks-1);
+            int const lastSubBlock = (n==nbSubBlocks-1);
             size_t const nbSeqsLastSubBlock = nbSeqs - (nbSubBlocks-1) * nbSeqsPerBlock;
-            size_t seqCount = lastSubBlock ? nbSeqsLastSubBlock : nbSeqsPerBlock;
+            size_t nbSeqsSubBlock = lastSubBlock ? nbSeqsLastSubBlock : nbSeqsPerBlock;
+            size_t seqCount = nbSeqsToProcess+nbSeqsSubBlock;
             size_t litSize = lastSubBlock ? (size_t)(lend-lp) : countLiterals(seqStorePtr, sp, seqCount);
             int litEntropyWritten = 0;
             int seqEntropyWritten = 0;
             const size_t decompressedSize =
                     ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSubBlock);
-            size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+            size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
                                                sp, seqCount,
                                                lp, litSize,
                                                llCodePtr, mlCodePtr, ofCodePtr,
@@ -521,59 +522,66 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
                                                bmi2, writeLitEntropy, writeSeqEntropy,
                                                &litEntropyWritten, &seqEntropyWritten,
                                                lastBlock && lastSubBlock);
+            nbSeqsToProcess = seqCount;
             FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
 
-            if (cSize == 0 || (cSize >= decompressedSize && n>0)) {
-                litEntropyWritten = 0;
-                seqEntropyWritten = 0;
-                cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, decompressedSize, lastBlock);
-                DEBUGLOG(5, "Generate uncompressed sub-block of %u bytes", (unsigned)(decompressedSize));
-                FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
-                assert(cSize != 0);
-                /* We have to regenerate the repcodes because we've skipped some sequences */
-                if (sp < send) {
-                    seqDef const* seq;
-                    repcodes_t rep;
-                    ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
-                    for (seq = sstart; seq < sp; ++seq) {
-                        ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
-                    }
-                    ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
+            if (cSize > 0 && cSize < decompressedSize) {
+                DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
+                            (unsigned)decompressedSize, (unsigned)cSize);
+                assert(ip + decompressedSize <= iend);
+                ip += decompressedSize;
+                lp += litSize;
+                op += cSize;
+                llCodePtr += seqCount;
+                mlCodePtr += seqCount;
+                ofCodePtr += seqCount;
+                /* Entropy only needs to be written once */
+                if (litEntropyWritten) {
+                    writeLitEntropy = 0;
                 }
+                if (seqEntropyWritten) {
+                    writeSeqEntropy = 0;
+                }
+                sp += seqCount;
+                nbSeqsToProcess = 0;
             }
-
-            DEBUGLOG(5, "Committed the sub-block");
-            assert(ip + decompressedSize <= iend);
-            ip += decompressedSize;
-            sp += seqCount;
-            lp += litSize;
-            op += cSize;
-            llCodePtr += seqCount;
-            mlCodePtr += seqCount;
-            ofCodePtr += seqCount;
-            /* Entropy only needs to be written once */
-            if (litEntropyWritten) {
-                writeLitEntropy = 0;
-            }
-            if (seqEntropyWritten) {
-                writeSeqEntropy = 0;
-            }
+            /* otherwise : coalesce current block with next one */
         }
     }
 
     if (writeLitEntropy) {
-        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
+        DEBUGLOG(5, "Literal entropy tables were never written");
         ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
     }
     if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
         /* If we haven't written our entropy tables, then we've violated our contract and
          * must emit an uncompressed block.
          */
-        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
+        DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block");
         return 0;
     }
-    assert(ip == iend); (void)iend;
-    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed: %u subBlocks, total compressed size = %u",
+
+    if (ip < iend) {
+        /* some data left : last part of the block sent uncompressed */
+        size_t const rSize = (size_t)((iend - ip));
+        size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
+        DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
+        FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+        assert(cSize != 0);
+        op += cSize;
+        /* We have to regenerate the repcodes because we've skipped some sequences */
+        if (sp < send) {
+            const seqDef* seq;
+            repcodes_t rep;
+            ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
+            for (seq = sstart; seq < sp; ++seq) {
+                ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
+            }
+            ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
+        }
+    }
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed %u subBlocks: total compressed size = %u",
                 (unsigned)nbSubBlocks, (unsigned)(op-ostart));
     return (size_t)(op-ostart);
 }
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index f57b31e70f6..0d17b5e0737 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -18,6 +18,7 @@
  * on stack (0), or into heap (1, default; requires malloc()).
  * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
  */
+#include "error_private.h"
 #ifndef ZSTD_HEAPMODE
 #  define ZSTD_HEAPMODE 1
 #endif
@@ -1023,12 +1024,14 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
         default:
             RETURN_ERROR(corruption_detected, "invalid block type");
         }
-
-        if (ZSTD_isError(decodedSize)) return decodedSize;
-        if (dctx->validateChecksum)
+        FORWARD_IF_ERROR(decodedSize, "Block decompression failure");
+        DEBUGLOG(5, "Decompressed block of dSize = %u", (unsigned)decodedSize);
+        if (dctx->validateChecksum) {
             XXH64_update(&dctx->xxhState, op, decodedSize);
-        if (decodedSize != 0)
+        }
+        if (decodedSize) /* support dst = NULL,0 */ {
             op += decodedSize;
+        }
         assert(ip != NULL);
         ip += cBlockSize;
         remainingSrcSize -= cBlockSize;
diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c
index 4be145732d9..1943d7f574c 100644
--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@@ -2073,7 +2073,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
                         const void* src, size_t srcSize, const streaming_operation streaming)
 {   /* blockType == blockCompressed */
     const BYTE* ip = (const BYTE*)src;
-    DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
+    DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize);
 
     /* Note : the wording of the specification
      * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).

From f77f634d41149c3e5754ebfe4d5cf3a5f138c843 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 24 Feb 2024 01:28:17 -0800
Subject: [PATCH 208/283] update API documentation

---
 doc/zstd_manual.html             | 67 +++++++++++++++++++++-----------
 lib/decompress/zstd_decompress.c |  8 ++--
 2 files changed, 48 insertions(+), 27 deletions(-)

diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index d72eacc34cd..c81627d8768 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -156,7 +156,7 @@ 

Helper functions

/* ZSTD_compressBound() :
  * for example to size a static array on stack.
  * Will produce constant value 0 if srcSize too large.
  */
-#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00LLU : 0xFF00FF00U)
+#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U)
 #define ZSTD_COMPRESSBOUND(srcSize)   (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
 size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
 /* ZSTD_isError() :
@@ -513,6 +513,7 @@ 

Decompression context

  When decompressing many times,
      * ZSTD_d_forceIgnoreChecksum
      * ZSTD_d_refMultipleDDicts
      * ZSTD_d_disableHuffmanAssembly
+     * ZSTD_d_maxBlockSize
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly
      */
@@ -520,7 +521,8 @@ 

Decompression context

  When decompressing many times,
      ZSTD_d_experimentalParam2=1001,
      ZSTD_d_experimentalParam3=1002,
      ZSTD_d_experimentalParam4=1003,
-     ZSTD_d_experimentalParam5=1004
+     ZSTD_d_experimentalParam5=1004,
+     ZSTD_d_experimentalParam6=1005
 
 } ZSTD_dParameter;
 

@@ -1386,58 +1388,61 @@

Streaming decompression functions


Memory management


 
-
ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+
ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void);
 

These functions make it possible to estimate memory usage of a future {D,C}Ctx, before its creation. + This is useful in combination with ZSTD_initStatic(), + which makes it possible to employ a static buffer for ZSTD_CCtx* state. ZSTD_estimateCCtxSize() will provide a memory budget large enough - for any compression level up to selected one. - Note : Unlike ZSTD_estimateCStreamSize*(), this estimate - does not include space for a window buffer. - Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2() + associated with any compression level up to max specified one. The estimate will assume the input may be arbitrarily large, which is the worst case. + Note that the size estimation is specific for one-shot compression, + it is not valid for streaming (see ZSTD_estimateCStreamSize*()) + nor other potential ways of using a ZSTD_CCtx* state. + When srcSize can be bound by a known and rather "small" value, - this fact can be used to provide a tighter estimation - because the CCtx compression context will need less memory. - This tighter estimation can be provided by more advanced functions + this knowledge can be used to provide a tighter budget estimation + because the ZSTD_CCtx* state will need less memory for small inputs. + This tighter estimation can be provided by employing more advanced functions ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. Note : only single-threaded compression is supported. ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. - - Note 2 : ZSTD_estimateCCtxSize* functions are not compatible with the Block-Level Sequence Producer API at this time. - Size estimates assume that no external sequence producer is registered.


-
ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+
ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
-ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
-

ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. - It will also consider src size to be arbitrarily "large", which is worst case. +

ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression + using any compression level up to the max specified one. + It will also consider src size to be arbitrarily "large", which is a worst case scenario. If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. Note : CStream size estimation is only correct for single-threaded compression. - ZSTD_DStream memory budget depends on window Size. + ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time. + Size estimates assume that no external sequence producer is registered. + + ZSTD_DStream memory budget depends on frame's window Size. This information can be passed manually, using ZSTD_estimateDStreamSize, or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + Any frame requesting a window size larger than max specified one will be rejected. Note : if streaming is init with function ZSTD_init?Stream_usingDict(), an internal ?Dict will be created, which additional size is not estimated here. In this case, get total size by adding ZSTD_estimate?DictSize - Note 2 : only single-threaded compression is supported. - ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. - Note 3 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time. - Size estimates assume that no external sequence producer is registered.


@@ -1926,7 +1931,7 @@

Advanced Streaming decompression functions

ZSTD_registerSequenceProducer( ZSTD_CCtx* cctx, void* sequenceProducerState, - ZSTD_sequenceProducer_F* sequenceProducer + ZSTD_sequenceProducer_F sequenceProducer );

Instruct zstd to use a block-level external sequence producer function. @@ -1948,6 +1953,22 @@

Advanced Streaming decompression functions

calling this function.


+
ZSTDLIB_STATIC_API void
+ZSTD_CCtxParams_registerSequenceProducer(
+  ZSTD_CCtx_params* params,
+  void* sequenceProducerState,
+  ZSTD_sequenceProducer_F sequenceProducer
+);
+

Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params. + This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(), + which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx(). + + If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx() + is required, then this function is for you. Otherwise, you probably don't need it. + + See tests/zstreamtest.c for example usage. +


+

Buffer-less and synchronous inner streaming functions (DEPRECATED)

   This API is deprecated, and will be removed in a future version.
   It allows streaming (de)compression with user allocated buffers.
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 0d17b5e0737..17305908540 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -18,7 +18,6 @@
  * on stack (0), or into heap (1, default; requires malloc()).
  * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
  */
-#include "error_private.h"
 #ifndef ZSTD_HEAPMODE
 #  define ZSTD_HEAPMODE 1
 #endif
@@ -56,18 +55,19 @@
 /*-*******************************************************
 *  Dependencies
 *********************************************************/
-#include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
 #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
+#include "../common/error_private.h"
+#include "../common/zstd_internal.h"  /* blockProperties_t */
 #include "../common/mem.h"         /* low level memory routines */
+#include "../common/bits.h"  /* ZSTD_highbit32 */
 #define FSE_STATIC_LINKING_ONLY
 #include "../common/fse.h"
 #include "../common/huf.h"
 #include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */
-#include "../common/zstd_internal.h"  /* blockProperties_t */
 #include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
 #include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
 #include "zstd_decompress_block.h"   /* ZSTD_decompressBlock_internal */
-#include "../common/bits.h"  /* ZSTD_highbit32 */
 
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
 #  include "../legacy/zstd_legacy.h"

From f8372191f595f112ba13445205cf46997da67350 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 24 Feb 2024 01:59:16 -0800
Subject: [PATCH 209/283] reduced minimum compressed block size

with the intention to match the transport layer size,
such as Ethernet and 4G mobile networks.
---
 lib/compress/zstd_compress_superblock.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index b35d9ad07db..824a2be6d5e 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -76,8 +76,8 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
     }
 
     {   int const flags = bmi2 ? HUF_flags_bmi2 : 0;
-        const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags)
-                                          : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags);
+        const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags)
+                                          : HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags);
         op += cSize;
         cLitSize += cSize;
         if (cSize == 0 || ERR_isError(cSize)) {
@@ -102,7 +102,7 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
     switch(lhSize)
     {
     case 3: /* 2 - 2 - 10 - 10 */
-        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
+        {   U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
             MEM_writeLE24(ostart, lhc);
             break;
         }
@@ -465,7 +465,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
     const BYTE* llCodePtr = seqStorePtr->llCode;
     const BYTE* mlCodePtr = seqStorePtr->mlCode;
     const BYTE* ofCodePtr = seqStorePtr->ofCode;
-    size_t const minTarget = 2 KB; /* enforce minimum size to avoid undesirable side effects */
+    size_t const minTarget = 1300; /* enforce minimum size, to reduce undesirable side effects */
     size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
     int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
     int writeSeqEntropy = 1;

From 4d2bf7f0f2feb2c6928204db218ff9384ac605ac Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 24 Feb 2024 23:03:40 -0800
Subject: [PATCH 210/283] removed sprintf usage from zstdcli.c

some static analyzers flag this standard C90 function as unsafe.
---
 programs/zstdcli.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index 85d0e12d7d0..1cb16ef50e7 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -318,7 +318,7 @@ static void usageAdvanced(const char* programName)
 
 static void badUsage(const char* programName, const char* parameter)
 {
-    DISPLAYLEVEL(1, "Incorrect parameter: %s\n", parameter);
+    DISPLAYLEVEL(1, "Incorrect parameter: %s \n", parameter);
     if (g_displayLevel >= 2) usage(stderr, programName);
 }
 
@@ -1148,7 +1148,6 @@ int main(int argCount, const char* argv[])
 
             argument++;
             while (argument[0]!=0) {
-                char shortArgument[3];
 
 #ifndef ZSTD_NOCOMPRESS
                 /* compression Level */
@@ -1281,9 +1280,11 @@ int main(int argCount, const char* argv[])
 
                     /* unknown command */
                 default :
-                    sprintf(shortArgument, "-%c", argument[0]);
-                    badUsage(programName, shortArgument);
-                    CLEAN_RETURN(1);
+                    {   char shortArgument[3] = {'-', 0, 0};
+                        shortArgument[1] = argument[0];
+                        badUsage(programName, shortArgument);
+                        CLEAN_RETURN(1);
+                    }
                 }
             }
             continue;

From 038a8a906b8bbf60491b2643febaf8f9d5a4139c Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sun, 25 Feb 2024 17:33:41 -0800
Subject: [PATCH 211/283] targetCBlockSize: modified splitting strategy to
 generate blocks of more regular size

notably avoiding to feature a larger first block
---
 lib/compress/zstd_compress_superblock.c | 166 +++++++++++++++++-------
 1 file changed, 119 insertions(+), 47 deletions(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index 824a2be6d5e..a32616409e6 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -390,7 +390,11 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
     return cSeqSizeEstimate + sequencesSectionHeaderSize;
 }
 
-static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
+typedef struct {
+    size_t estLitSize;
+    size_t estBlockSize;
+} EstimatedBlockSize;
+static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
                                         const BYTE* ofCodeTable,
                                         const BYTE* llCodeTable,
                                         const BYTE* mlCodeTable,
@@ -398,15 +402,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
                                         const ZSTD_entropyCTables_t* entropy,
                                         const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
                                         void* workspace, size_t wkspSize,
-                                        int writeLitEntropy, int writeSeqEntropy) {
-    size_t cSizeEstimate = 0;
-    cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
-                                                         &entropy->huf, &entropyMetadata->hufMetadata,
-                                                         workspace, wkspSize, writeLitEntropy);
-    cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+                                        int writeLitEntropy, int writeSeqEntropy)
+{
+    EstimatedBlockSize ebs;
+    ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize,
+                                                        &entropy->huf, &entropyMetadata->hufMetadata,
+                                                        workspace, wkspSize, writeLitEntropy);
+    ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
                                                          nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
                                                          workspace, wkspSize, writeSeqEntropy);
-    return cSizeEstimate + ZSTD_blockHeaderSize;
+    ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize;
+    return ebs;
 }
 
 static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
@@ -427,17 +433,43 @@ static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t
     for (n=0; n %zu bytes", seqCount, (const void*)sp, total);
+    DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
     return total;
 }
 
+#define BYTESCALE 256
+
+static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
+                size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
+                int firstSubBlock)
+{
+    size_t n, budget = 0;
+    /* entropy headers */
+    if (firstSubBlock) {
+        budget += 120 * BYTESCALE; /* generous estimate */
+    }
+    /* first sequence => at least one sequence*/
+    budget += sp[0].litLength * avgLitCost + avgSeqCost;;
+    if (budget > targetBudget) return 1;
+
+    /* loop over sequences */
+    for (n=1; n targetBudget) break;
+        budget += currentCost;
+    }
+    return n;
+}
+
+#define CBLOCK_TARGET_SIZE_MIN 1300 /* suitable to fit an ethernet / wifi / 4G transport frame */
+
 /** ZSTD_compressSubBlock_multi() :
  *  Breaks super-block into multiple sub-blocks and compresses them.
- *  Entropy will be written to the first block.
- *  The following blocks will use repeat mode to compress.
- *  All sub-blocks are compressed blocks (no raw or rle blocks).
- *  @return : compressed size of the super block (which is multiple ZSTD blocks)
- *            Or 0 if it failed to compress. */
+ *  Entropy will be written into the first block.
+ *  The following blocks use repeat_mode to compress.
+ *  Sub-blocks are all compressed, except the last one when beneficial.
+ *  @return : compressed size of the super block (which features multiple ZSTD blocks)
+ *            or 0 if it failed to compress. */
 static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
                             const ZSTD_compressedBlockState_t* prevCBlock,
                             ZSTD_compressedBlockState_t* nextCBlock,
@@ -452,7 +484,6 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
     const seqDef* const send = seqStorePtr->sequences;
     const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
     size_t const nbSeqs = (size_t)(send - sstart);
-    size_t nbSeqsPerBlock = nbSeqs;
     const BYTE* const lstart = seqStorePtr->litStart;
     const BYTE* const lend = seqStorePtr->lit;
     const BYTE* lp = lstart;
@@ -465,54 +496,96 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
     const BYTE* llCodePtr = seqStorePtr->llCode;
     const BYTE* mlCodePtr = seqStorePtr->mlCode;
     const BYTE* ofCodePtr = seqStorePtr->ofCode;
-    size_t const minTarget = 1300; /* enforce minimum size, to reduce undesirable side effects */
+    size_t const minTarget = CBLOCK_TARGET_SIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
     size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
     int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
     int writeSeqEntropy = 1;
     size_t nbSubBlocks = 1;
+    size_t avgLitCost, avgSeqCost, avgBlockBudget;
 
     DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
                (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
 
+        /* let's start by a general estimation for the full block */
     if (nbSeqs == 0) {
-        /* special case : no sequence */
-        nbSeqsPerBlock = 0;
         nbSubBlocks = 1;
     } else {
-        /* let's start by a general estimation for the full block */
-        size_t const cBlockSizeEstimate =
+        EstimatedBlockSize const ebs =
                 ZSTD_estimateSubBlockSize(lp, nbLiterals,
                                         ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
                                         &nextCBlock->entropy, entropyMetadata,
                                         workspace, wkspSize,
                                         writeLitEntropy, writeSeqEntropy);
         /* quick estimation */
-        nbSubBlocks = (cBlockSizeEstimate + (targetCBlockSize-1)) / targetCBlockSize;
-        assert(nbSubBlocks > 0);
-        if (nbSeqs > nbSubBlocks) {
-            nbSeqsPerBlock = nbSeqs / nbSubBlocks;
-        } else {
-            nbSeqsPerBlock = 1;
-            nbSubBlocks = nbSeqs;
-        }
-        /* Note: this is very approximative. Obviously, some sub-blocks will be larger and others smaller.
-         * But the contract of this feature has always been approximative, so for now we'll leverage it for speed.
-         * It can be refined later, for closer-to-target compressed block size, if it ever matters. */
+        avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
+        avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
+        nbSubBlocks = (ebs.estBlockSize + (targetCBlockSize-1)) / targetCBlockSize;
+        if (nbSubBlocks<1) nbSubBlocks=1;
+        avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
+        DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
+                    (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
+                    (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
     }
 
-    /* write sub-blocks */
+    /* compress and write sub-blocks */
     {   size_t n;
-        size_t nbSeqsToProcess = 0;
-        for (n=0; n < nbSubBlocks; n++) {
-            int const lastSubBlock = (n==nbSubBlocks-1);
-            size_t const nbSeqsLastSubBlock = nbSeqs - (nbSubBlocks-1) * nbSeqsPerBlock;
-            size_t nbSeqsSubBlock = lastSubBlock ? nbSeqsLastSubBlock : nbSeqsPerBlock;
-            size_t seqCount = nbSeqsToProcess+nbSeqsSubBlock;
-            size_t litSize = lastSubBlock ? (size_t)(lend-lp) : countLiterals(seqStorePtr, sp, seqCount);
-            int litEntropyWritten = 0;
+        size_t blockBudgetSupp = 0;
+        for (n=0; n+1 < nbSubBlocks; n++) {
+            /* determine nb of sequences for current sub-block + nbLiterals from next sequence */
+            size_t seqCount = sizeBlockSequences(sp, (size_t)(send-sp), avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
+            /* if reached last sequence : break to last sub-block (simplification) */
+            assert(seqCount <= (size_t)(send-sp));
+            if (sp + seqCount == send) break;
+            assert(seqCount > 0);
+            /* compress sub-block */
+            {   int litEntropyWritten = 0;
+                int seqEntropyWritten = 0;
+                size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
+                const size_t decompressedSize =
+                        ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
+                size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+                                                sp, seqCount,
+                                                lp, litSize,
+                                                llCodePtr, mlCodePtr, ofCodePtr,
+                                                cctxParams,
+                                                op, (size_t)(oend-op),
+                                                bmi2, writeLitEntropy, writeSeqEntropy,
+                                                &litEntropyWritten, &seqEntropyWritten,
+                                                0);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+
+                /* check compressibility, update state components */
+                if (cSize > 0 && cSize < decompressedSize) {
+                    DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
+                                (unsigned)decompressedSize, (unsigned)cSize);
+                    assert(ip + decompressedSize <= iend);
+                    ip += decompressedSize;
+                    lp += litSize;
+                    op += cSize;
+                    llCodePtr += seqCount;
+                    mlCodePtr += seqCount;
+                    ofCodePtr += seqCount;
+                    /* Entropy only needs to be written once */
+                    if (litEntropyWritten) {
+                        writeLitEntropy = 0;
+                    }
+                    if (seqEntropyWritten) {
+                        writeSeqEntropy = 0;
+                    }
+                    sp += seqCount;
+                    blockBudgetSupp = 0;
+            }   }
+            /* otherwise : do not compress yet, coalesce current block with next one */
+        }
+
+        /* write last block */
+        DEBUGLOG(2, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
+        {   int litEntropyWritten = 0;
             int seqEntropyWritten = 0;
+            size_t litSize = (size_t)(lend - lp);
+            size_t seqCount = (size_t)(send - sp);
             const size_t decompressedSize =
-                    ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSubBlock);
+                    ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
             size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
                                                sp, seqCount,
                                                lp, litSize,
@@ -521,12 +594,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
                                                op, (size_t)(oend-op),
                                                bmi2, writeLitEntropy, writeSeqEntropy,
                                                &litEntropyWritten, &seqEntropyWritten,
-                                               lastBlock && lastSubBlock);
-            nbSeqsToProcess = seqCount;
+                                               lastBlock);
             FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
 
+            /* update pointers, the nb of literals borrowed from next sequence must be preserved */
             if (cSize > 0 && cSize < decompressedSize) {
-                DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
+                DEBUGLOG(2, "Last sub-block compressed %u bytes => %u bytes",
                             (unsigned)decompressedSize, (unsigned)cSize);
                 assert(ip + decompressedSize <= iend);
                 ip += decompressedSize;
@@ -543,9 +616,8 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
                     writeSeqEntropy = 0;
                 }
                 sp += seqCount;
-                nbSeqsToProcess = 0;
+                blockBudgetSupp = 0;
             }
-            /* otherwise : coalesce current block with next one */
         }
     }
 
@@ -565,7 +637,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
         /* some data left : last part of the block sent uncompressed */
         size_t const rSize = (size_t)((iend - ip));
         size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
-        DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
+        DEBUGLOG(2, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
         FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
         assert(cSize != 0);
         op += cSize;

From 1fafd0c4ae56a524a92369c065d616a447a21a0f Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sun, 25 Feb 2024 19:45:32 -0800
Subject: [PATCH 212/283] fix minor visual static analyzer warning

it's a false positive,
but change the code nonetheless to make it more obvious to the static analyzer.
---
 lib/compress/zstd_compress_superblock.c | 100 +++++++++++-------------
 1 file changed, 47 insertions(+), 53 deletions(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index a32616409e6..5d0d23353cc 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -500,16 +500,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
     size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
     int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
     int writeSeqEntropy = 1;
-    size_t nbSubBlocks = 1;
-    size_t avgLitCost, avgSeqCost, avgBlockBudget;
 
     DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
                (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
 
         /* let's start by a general estimation for the full block */
-    if (nbSeqs == 0) {
-        nbSubBlocks = 1;
-    } else {
+    if (nbSeqs > 0) {
         EstimatedBlockSize const ebs =
                 ZSTD_estimateSubBlockSize(lp, nbLiterals,
                                         ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
@@ -517,19 +513,17 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
                                         workspace, wkspSize,
                                         writeLitEntropy, writeSeqEntropy);
         /* quick estimation */
-        avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
-        avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
-        nbSubBlocks = (ebs.estBlockSize + (targetCBlockSize-1)) / targetCBlockSize;
+        size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
+        size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
+        size_t nbSubBlocks = (ebs.estBlockSize + (targetCBlockSize-1)) / targetCBlockSize;
+        size_t n, avgBlockBudget, blockBudgetSupp=0;
         if (nbSubBlocks<1) nbSubBlocks=1;
         avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
         DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
                     (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
                     (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
-    }
 
     /* compress and write sub-blocks */
-    {   size_t n;
-        size_t blockBudgetSupp = 0;
         for (n=0; n+1 < nbSubBlocks; n++) {
             /* determine nb of sequences for current sub-block + nbLiterals from next sequence */
             size_t seqCount = sizeBlockSequences(sp, (size_t)(send-sp), avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
@@ -577,50 +571,50 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
             }   }
             /* otherwise : do not compress yet, coalesce current block with next one */
         }
-
-        /* write last block */
-        DEBUGLOG(2, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
-        {   int litEntropyWritten = 0;
-            int seqEntropyWritten = 0;
-            size_t litSize = (size_t)(lend - lp);
-            size_t seqCount = (size_t)(send - sp);
-            const size_t decompressedSize =
-                    ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
-            size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
-                                               sp, seqCount,
-                                               lp, litSize,
-                                               llCodePtr, mlCodePtr, ofCodePtr,
-                                               cctxParams,
-                                               op, (size_t)(oend-op),
-                                               bmi2, writeLitEntropy, writeSeqEntropy,
-                                               &litEntropyWritten, &seqEntropyWritten,
-                                               lastBlock);
-            FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
-
-            /* update pointers, the nb of literals borrowed from next sequence must be preserved */
-            if (cSize > 0 && cSize < decompressedSize) {
-                DEBUGLOG(2, "Last sub-block compressed %u bytes => %u bytes",
-                            (unsigned)decompressedSize, (unsigned)cSize);
-                assert(ip + decompressedSize <= iend);
-                ip += decompressedSize;
-                lp += litSize;
-                op += cSize;
-                llCodePtr += seqCount;
-                mlCodePtr += seqCount;
-                ofCodePtr += seqCount;
-                /* Entropy only needs to be written once */
-                if (litEntropyWritten) {
-                    writeLitEntropy = 0;
-                }
-                if (seqEntropyWritten) {
-                    writeSeqEntropy = 0;
-                }
-                sp += seqCount;
-                blockBudgetSupp = 0;
+    } /* if (nbSeqs > 0) */
+
+    /* write last block */
+    DEBUGLOG(2, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
+    {   int litEntropyWritten = 0;
+        int seqEntropyWritten = 0;
+        size_t litSize = (size_t)(lend - lp);
+        size_t seqCount = (size_t)(send - sp);
+        const size_t decompressedSize =
+                ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
+        size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+                                            sp, seqCount,
+                                            lp, litSize,
+                                            llCodePtr, mlCodePtr, ofCodePtr,
+                                            cctxParams,
+                                            op, (size_t)(oend-op),
+                                            bmi2, writeLitEntropy, writeSeqEntropy,
+                                            &litEntropyWritten, &seqEntropyWritten,
+                                            lastBlock);
+        FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+
+        /* update pointers, the nb of literals borrowed from next sequence must be preserved */
+        if (cSize > 0 && cSize < decompressedSize) {
+            DEBUGLOG(2, "Last sub-block compressed %u bytes => %u bytes",
+                        (unsigned)decompressedSize, (unsigned)cSize);
+            assert(ip + decompressedSize <= iend);
+            ip += decompressedSize;
+            lp += litSize;
+            op += cSize;
+            llCodePtr += seqCount;
+            mlCodePtr += seqCount;
+            ofCodePtr += seqCount;
+            /* Entropy only needs to be written once */
+            if (litEntropyWritten) {
+                writeLitEntropy = 0;
             }
+            if (seqEntropyWritten) {
+                writeSeqEntropy = 0;
+            }
+            sp += seqCount;
         }
     }
 
+
     if (writeLitEntropy) {
         DEBUGLOG(5, "Literal entropy tables were never written");
         ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
@@ -653,8 +647,8 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
         }
     }
 
-    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed %u subBlocks: total compressed size = %u",
-                (unsigned)nbSubBlocks, (unsigned)(op-ostart));
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
+                (unsigned)(op-ostart));
     return (size_t)(op-ostart);
 }
 

From a412bedb3f63a5bbb88601c0ab085a8eb0c39e48 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 26 Feb 2024 05:59:24 +0000
Subject: [PATCH 213/283] Bump github/codeql-action from 3.23.0 to 3.24.5

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.23.0 to 3.24.5.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/e5f05b81d5b6ff8cfa111c80c22c5fd02a384118...47b3d888fe66b639e431abf22ebca059152f1eea)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
---
 .github/workflows/scorecards.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 0ccd87625bc..fce0784e31f 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -59,6 +59,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@e5f05b81d5b6ff8cfa111c80c22c5fd02a384118 # tag=v3.23.0
+        uses: github/codeql-action/upload-sarif@47b3d888fe66b639e431abf22ebca059152f1eea # tag=v3.24.5
         with:
           sarif_file: results.sarif

From e0412c20625c7358d506c969a9c9861b70eb10ee Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 26 Feb 2024 12:26:54 -0800
Subject: [PATCH 214/283] fix extraneous semicolon ';'

as reported by @terrelln
---
 lib/compress/zstd_compress_superblock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index 5d0d23353cc..32bea370f36 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -449,7 +449,7 @@ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
         budget += 120 * BYTESCALE; /* generous estimate */
     }
     /* first sequence => at least one sequence*/
-    budget += sp[0].litLength * avgLitCost + avgSeqCost;;
+    budget += sp[0].litLength * avgLitCost + avgSeqCost;
     if (budget > targetBudget) return 1;
 
     /* loop over sequences */

From aa8592c532e1a2b30b08763140b9bd66bdce4f83 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 26 Feb 2024 13:21:14 -0800
Subject: [PATCH 215/283] minor: reformulate nbSubBlocks assignment

---
 lib/compress/zstd_compress_superblock.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index 32bea370f36..cf3cb436228 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -461,7 +461,7 @@ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
     return n;
 }
 
-#define CBLOCK_TARGET_SIZE_MIN 1300 /* suitable to fit an ethernet / wifi / 4G transport frame */
+#define CBLOCK_TARGET_SIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */
 
 /** ZSTD_compressSubBlock_multi() :
  *  Breaks super-block into multiple sub-blocks and compresses them.
@@ -515,9 +515,8 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
         /* quick estimation */
         size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
         size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
-        size_t nbSubBlocks = (ebs.estBlockSize + (targetCBlockSize-1)) / targetCBlockSize;
+        const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
         size_t n, avgBlockBudget, blockBudgetSupp=0;
-        if (nbSubBlocks<1) nbSubBlocks=1;
         avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
         DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
                     (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,

From ef82b214ad1023f6123c3d9c9a7dbce24130d9bd Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 26 Feb 2024 13:23:59 -0800
Subject: [PATCH 216/283] nit: comment indentation

as reported by @terrelln
---
 lib/compress/zstd_compress_superblock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index cf3cb436228..e9038c472c0 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -522,7 +522,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
                     (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
                     (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
 
-    /* compress and write sub-blocks */
+        /* compress and write sub-blocks */
         for (n=0; n+1 < nbSubBlocks; n++) {
             /* determine nb of sequences for current sub-block + nbLiterals from next sequence */
             size_t seqCount = sizeBlockSequences(sp, (size_t)(send-sp), avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);

From 86db60752d1f813642054d12d704663c7757d434 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 26 Feb 2024 13:27:59 -0800
Subject: [PATCH 217/283] optimization: bail out faster in presence of
 incompressible data

---
 lib/compress/zstd_compress_superblock.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index e9038c472c0..a9e9493bed3 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -521,6 +521,9 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
         DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
                     (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
                     (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
+        /* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately
+         * this will result in the production of a single uncompressed block covering @srcSize.*/
+        if (ebs.estBlockSize > srcSize) return 0;
 
         /* compress and write sub-blocks */
         for (n=0; n+1 < nbSubBlocks; n++) {
@@ -568,7 +571,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
                     sp += seqCount;
                     blockBudgetSupp = 0;
             }   }
-            /* otherwise : do not compress yet, coalesce current block with next one */
+            /* otherwise : do not compress yet, coalesce current sub-block with following one */
         }
     } /* if (nbSeqs > 0) */
 

From d23b95d21d5cb9c5378b3537271dbbff7cdb49b7 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 26 Feb 2024 14:06:34 -0800
Subject: [PATCH 218/283] minor refactor for clarity

since we can ensure that nbSubBlocks>0
---
 lib/compress/zstd_compress_superblock.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index a9e9493bed3..295ccf30498 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -526,9 +526,11 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
         if (ebs.estBlockSize > srcSize) return 0;
 
         /* compress and write sub-blocks */
-        for (n=0; n+1 < nbSubBlocks; n++) {
+        assert(nbSubBlocks>0);
+        for (n=0; n < nbSubBlocks-1; n++) {
             /* determine nb of sequences for current sub-block + nbLiterals from next sequence */
-            size_t seqCount = sizeBlockSequences(sp, (size_t)(send-sp), avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
+            size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp),
+                                        avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
             /* if reached last sequence : break to last sub-block (simplification) */
             assert(seqCount <= (size_t)(send-sp));
             if (sp + seqCount == send) break;

From 8d31e8ec42a736bf7cc70f9f21e9c1afc920c148 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 26 Feb 2024 14:31:12 -0800
Subject: [PATCH 219/283] sizeBlockSequences() also tracks uncompressed size

and only defines a sub-block boundary when
it believes that it is compressible.

It's effectively an optimization,
avoiding a compression cycle to reach the same conclusion.
---
 lib/compress/zstd_compress_superblock.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index 295ccf30498..f5430eccb0e 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -443,21 +443,29 @@ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
                 size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
                 int firstSubBlock)
 {
-    size_t n, budget = 0;
+    size_t n, budget = 0, inSize=0;
     /* entropy headers */
-    if (firstSubBlock) {
-        budget += 120 * BYTESCALE; /* generous estimate */
-    }
+    size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
+    assert(firstSubBlock==0 || firstSubBlock==1);
+    budget += headerSize;
+
     /* first sequence => at least one sequence*/
     budget += sp[0].litLength * avgLitCost + avgSeqCost;
     if (budget > targetBudget) return 1;
+    inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
 
     /* loop over sequences */
     for (n=1; n targetBudget) break;
         budget += currentCost;
+        inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
+        /* stop when sub-block budget is reached */
+        if ( (budget > targetBudget)
+            /* though continue to expand until the sub-block is deemed compressible */
+          && (budget < inSize * BYTESCALE) )
+            break;
     }
+
     return n;
 }
 

From dcd713ce06fd9729e2e1eefa079be866f5e2f519 Mon Sep 17 00:00:00 2001
From: Theodore Tsirpanis 
Date: Tue, 27 Feb 2024 23:39:59 +0200
Subject: [PATCH 220/283] Define the unified target inside the CMake project,
 and export it.

This is less error-prone.
---
 build/cmake/lib/CMakeLists.txt  | 10 ++++++++++
 build/cmake/zstdConfig.cmake.in | 14 --------------
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt
index 2b64a07e2f8..7e16470fde2 100644
--- a/build/cmake/lib/CMakeLists.txt
+++ b/build/cmake/lib/CMakeLists.txt
@@ -145,6 +145,16 @@ if (ZSTD_BUILD_STATIC)
     add_definition(libzstd_static ZSTDLIB_STATIC_API)
     add_definition(libzstd_static ZDICTLIB_STATIC_API)
 endif ()
+if (ZSTD_BUILD_SHARED AND NOT ZSTD_BUILD_STATIC)
+    add_library(libzstd INTERFACE)
+    target_link_libraries(libzstd INTERFACE libzstd_shared)
+    list(APPEND library_targets libzstd)
+endif ()
+if (ZSTD_BUILD_STATIC AND NOT ZSTD_BUILD_SHARED)
+    add_library(libzstd INTERFACE)
+    target_link_libraries(libzstd INTERFACE libzstd_static)
+    list(APPEND library_targets libzstd)
+endif ()
 
 # Add specific compile definitions for MSVC project
 if (MSVC)
diff --git a/build/cmake/zstdConfig.cmake.in b/build/cmake/zstdConfig.cmake.in
index 0a7f773d75f..f4190f989ba 100644
--- a/build/cmake/zstdConfig.cmake.in
+++ b/build/cmake/zstdConfig.cmake.in
@@ -7,18 +7,4 @@ endif()
 
 include("${CMAKE_CURRENT_LIST_DIR}/zstdTargets.cmake")
 
-if(NOT TARGET zstd::libzstd)
-  if(@ZSTD_BUILD_SHARED@ AND NOT @ZSTD_BUILD_STATIC@)
-    add_library(zstd::libzstd INTERFACE IMPORTED)
-    set_target_properties(zstd::libzstd PROPERTIES
-      INTERFACE_LINK_LIBRARIES "zstd::libzstd_shared"
-    )
-  elseif(NOT @ZSTD_BUILD_SHARED@ AND @ZSTD_BUILD_STATIC@)
-    add_library(zstd::libzstd INTERFACE IMPORTED)
-    set_target_properties(zstd::libzstd PROPERTIES
-      INTERFACE_LINK_LIBRARIES "zstd::libzstd_static"
-    )
-  endif()
-endif()
-
 check_required_components("zstd")

From bb4f85db42925a1dd129e733d3413316ebd5c9bb Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sun, 3 Mar 2024 18:47:08 -0800
Subject: [PATCH 221/283] fix version of actions/checkout

---
 .github/workflows/dev-long-tests.yml  | 50 +++++++++---------
 .github/workflows/dev-short-tests.yml | 74 +++++++++++++--------------
 2 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml
index 39910053136..21882f024b8 100644
--- a/.github/workflows/dev-long-tests.yml
+++ b/.github/workflows/dev-long-tests.yml
@@ -15,7 +15,7 @@ jobs:
   make-all:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: make all
       run: make all
 
@@ -26,7 +26,7 @@ jobs:
       DEVNULLRIGHTS: 1
       READFROMBLOCKDEVICE: 1
     steps:
-    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: make test
       run: make test
 
@@ -34,7 +34,7 @@ jobs:
   make-test-osx:
     runs-on: macos-latest
     steps:
-    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: OS-X test
       run: make test # make -c lib all doesn't work because of the fact that it's not a tty
 
@@ -45,7 +45,7 @@ jobs:
       DEVNULLRIGHTS: 1
       READFROMBLOCKDEVICE: 1
     steps:
-    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: make test
       run: |
         sudo apt-get -qqq update
@@ -55,21 +55,21 @@ jobs:
   no-intrinsics-fuzztest:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: no intrinsics fuzztest
       run: MOREFLAGS="-DZSTD_NO_INTRINSICS" make -C tests fuzztest
 
   tsan-zstreamtest:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: thread sanitizer zstreamtest
       run: CC=clang ZSTREAM_TESTTIME=-T3mn make tsan-test-zstream
 
   ubsan-zstreamtest:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: undefined behavior sanitizer zstreamtest
       run: CC=clang make uasan-test-zstream
 
@@ -77,7 +77,7 @@ jobs:
   tsan-fuzztest:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: thread sanitizer fuzztest
       run: CC=clang make tsan-fuzztest
 
@@ -85,7 +85,7 @@ jobs:
   big-tests-zstreamtest32:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: zstream tests in 32bit mode, with big tests
       run: |
         sudo apt-get -qqq update
@@ -96,7 +96,7 @@ jobs:
   gcc-8-asan-ubsan-testzstd:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: gcc-8 + ASan + UBSan + Test Zstd
       # See https://askubuntu.com/a/1428822
       run: |
@@ -108,14 +108,14 @@ jobs:
   clang-asan-ubsan-testzstd:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: clang + ASan + UBSan + Test Zstd
       run: CC=clang make -j uasan-test-zstd 
Date: Mon, 4 Mar 2024 05:34:30 +0000
Subject: [PATCH 222/283] Bump github/codeql-action from 3.24.5 to 3.24.6

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.24.5 to 3.24.6.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/47b3d888fe66b639e431abf22ebca059152f1eea...8a470fddafa5cbb6266ee11b37ef4d8aae19c571)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
---
 .github/workflows/scorecards.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 18b0db45d64..513320271de 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -59,6 +59,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@47b3d888fe66b639e431abf22ebca059152f1eea # tag=v3.24.5
+        uses: github/codeql-action/upload-sarif@8a470fddafa5cbb6266ee11b37ef4d8aae19c571 # tag=v3.24.6
         with:
           sarif_file: results.sarif

From 4fb0a77314cabc65eb90895fae35a7f38ace560d Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sun, 3 Mar 2024 23:24:40 -0800
Subject: [PATCH 223/283] update -V documentation

to answer #3727 comment
---
 programs/zstd.1.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index 078455f3e4e..c5d0ef70a36 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -290,10 +290,11 @@ the last one takes effect.
 * `-h`/`-H`, `--help`:
     display help/long help and exit
 * `-V`, `--version`:
-    display version number and exit.
+    display version number and immediately exit.
+    note that, since it exits, flags specified after `-V` are effectively ignored.
     Advanced: `-vV` also displays supported formats.
     `-vvV` also displays POSIX support.
-    `-q` will only display the version number, suitable for machine reading.
+    `-qV` will only display the version number, suitable for machine reading.
 * `-v`, `--verbose`:
     verbose mode, display more information
 * `-q`, `--quiet`:

From 2abe8d63e06f0e7c9adacd50855a05023e51f1e0 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 4 Mar 2024 00:16:01 -0800
Subject: [PATCH 224/283] fix LLU->ULL

LLU is a correct prefix according to C99 & C11 standards (but not C90).
However, older versions of Visual Studio do not work with it.
Replace by ULL, which doesn't have this issue.

Fixes https://github.com/facebook/zstd/issues/3647
---
 tests/fuzzer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 6c1f58df7db..09349218111 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -531,7 +531,7 @@ static void test_decompressBound(unsigned tnb)
             CHECK_EQ( ZSTD_flushStream(cctx, &out), 0 );
         }
         CHECK_EQ( ZSTD_endStream(cctx, &out), 0 );
-        CHECK( ZSTD_decompressBound(outBuffer, out.pos) > 0x100000000LLU /* 4 GB */ );
+        CHECK( ZSTD_decompressBound(outBuffer, out.pos) > 0x100000000ULL /* 4 GB */ );
         ZSTD_freeCCtx(cctx);
         free(outBuffer);
     }

From 007cda88ca1c7819eec966ce030934756d33c8c1 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Thu, 7 Mar 2024 16:43:13 -0800
Subject: [PATCH 225/283] prevent XXH64 from being autovectorized by XXH512 by
 default

backport fix https://github.com/Cyan4973/xxHash/pull/924 from libxxhash
---
 lib/common/xxhash.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/lib/common/xxhash.h b/lib/common/xxhash.h
index e5ed3dc0443..d95bad19166 100644
--- a/lib/common/xxhash.h
+++ b/lib/common/xxhash.h
@@ -3317,6 +3317,23 @@ static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
     acc += input * XXH_PRIME64_2;
     acc  = XXH_rotl64(acc, 31);
     acc *= XXH_PRIME64_1;
+#if (defined(__AVX512F__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+    /*
+     * DISABLE AUTOVECTORIZATION:
+     * A compiler fence is used to prevent GCC and Clang from
+     * autovectorizing the XXH64 loop (pragmas and attributes don't work for some
+     * reason) without globally disabling AVX512.
+     *
+     * Autovectorization of XXH64 tends to be detrimental,
+     * though the exact outcome may change depending on exact cpu and compiler version.
+     * For information, it has been reported as detrimental for Skylake-X,
+     * but possibly beneficial for Zen4.
+     *
+     * The default is to disable auto-vectorization,
+     * but you can select to enable it instead using `XXH_ENABLE_AUTOVECTORIZE` build variable.
+     */
+    XXH_COMPILER_GUARD(acc);
+#endif
     return acc;
 }
 

From ad590275b482d4c561bdc58418ef6b6a1db80c25 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Thu, 7 Mar 2024 16:54:44 -0800
Subject: [PATCH 226/283] added RISC-V emulation tests on Github CI

---
 .github/workflows/dev-short-tests.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml
index a781c25d004..9c36386a13f 100644
--- a/.github/workflows/dev-short-tests.yml
+++ b/.github/workflows/dev-short-tests.yml
@@ -380,6 +380,7 @@ jobs:
           { name: PPC64LE,  xcc_pkg: gcc-powerpc64le-linux-gnu, xcc: powerpc64le-linux-gnu-gcc, xemu_pkg: qemu-system-ppc,    xemu: qemu-ppc64le-static },
           { name: S390X,    xcc_pkg: gcc-s390x-linux-gnu,       xcc: s390x-linux-gnu-gcc,       xemu_pkg: qemu-system-s390x,  xemu: qemu-s390x-static   },
           { name: MIPS,     xcc_pkg: gcc-mips-linux-gnu,        xcc: mips-linux-gnu-gcc,        xemu_pkg: qemu-system-mips,   xemu: qemu-mips-static    },
+          { name: RISC-V,   xcc_pkg: gcc-riscv64-linux-gnu,     xcc: riscv64-linux-gnu-gcc,     xemu_pkg: qemu-system-riscv64,xemu: qemu-riscv64-static },
           { name: M68K,     xcc_pkg: gcc-m68k-linux-gnu,        xcc: m68k-linux-gnu-gcc,        xemu_pkg: qemu-system-m68k,   xemu: qemu-m68k-static    },
           { name: SPARC,    xcc_pkg: gcc-sparc64-linux-gnu,     xcc: sparc64-linux-gnu-gcc,     xemu_pkg: qemu-system-sparc,  xemu: qemu-sparc64-static },
         ]
@@ -424,6 +425,10 @@ jobs:
       if: ${{ matrix.name == 'MIPS' }}
       run: |
         LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
+    - name: RISC-V
+      if: ${{ matrix.name == 'RISC-V' }}
+      run: |
+        LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
     - name: M68K
       if: ${{ matrix.name == 'M68K' }}
       run: |

From aed172a8fe84caccc86e5f27999a309d1df47c00 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 8 Mar 2024 14:29:44 -0800
Subject: [PATCH 227/283] minor: fix incorrect debug level

---
 lib/compress/zstd_compress_superblock.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index f5430eccb0e..8c466c47952 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -586,7 +586,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
     } /* if (nbSeqs > 0) */
 
     /* write last block */
-    DEBUGLOG(2, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
+    DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
     {   int litEntropyWritten = 0;
         int seqEntropyWritten = 0;
         size_t litSize = (size_t)(lend - lp);
@@ -606,7 +606,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
 
         /* update pointers, the nb of literals borrowed from next sequence must be preserved */
         if (cSize > 0 && cSize < decompressedSize) {
-            DEBUGLOG(2, "Last sub-block compressed %u bytes => %u bytes",
+            DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
                         (unsigned)decompressedSize, (unsigned)cSize);
             assert(ip + decompressedSize <= iend);
             ip += decompressedSize;
@@ -643,7 +643,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
         /* some data left : last part of the block sent uncompressed */
         size_t const rSize = (size_t)((iend - ip));
         size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
-        DEBUGLOG(2, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
+        DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
         FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
         assert(cSize != 0);
         op += cSize;

From a9fb8d4c41bf3cc829adf20aea3768863d03cd0d Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 8 Mar 2024 14:55:38 -0800
Subject: [PATCH 228/283] new method to deal with offset==0

in this new method, when an `offset==0` is detected,
it's converted into (size_t)(-1), instead of 1.

The logic is that (size_t)(-1) is effectively an extremely large positive number,
which will not pass the offset distance test at next stage (`execSequence()`).
Checked the source code, and offset is always checked (as it should),
using a formula which is not vulnerable to arithmetic overflow:
```
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart),
```

The benefit is that such a case (offset==0) is always detected as corrupted data
as opposed to relying on the checksum to detect the error.
---
 lib/decompress/zstd_decompress_block.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c
index 8d9fea5fd81..76d7332e888 100644
--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@@ -1305,7 +1305,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c
                 } else {
                     offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
                     {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
-                        temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
+                        temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */
                         if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
                         seqState->prevOffset[1] = seqState->prevOffset[0];
                         seqState->prevOffset[0] = offset = temp;

From d2f56ba44208f56b5370a9ef6ce0d2c32f283131 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 8 Mar 2024 15:55:30 -0800
Subject: [PATCH 229/283] update documentation

---
 doc/decompressor_accepted_invalid_data.md | 14 -----
 doc/decompressor_permissive.md            | 62 +++++++++++++++++++++++
 2 files changed, 62 insertions(+), 14 deletions(-)
 delete mode 100644 doc/decompressor_accepted_invalid_data.md
 create mode 100644 doc/decompressor_permissive.md

diff --git a/doc/decompressor_accepted_invalid_data.md b/doc/decompressor_accepted_invalid_data.md
deleted file mode 100644
index f08f963d93e..00000000000
--- a/doc/decompressor_accepted_invalid_data.md
+++ /dev/null
@@ -1,14 +0,0 @@
-Decompressor Accepted Invalid Data
-==================================
-
-This document describes the behavior of the reference decompressor in cases
-where it accepts an invalid frame instead of reporting an error.
-
-Zero offsets converted to 1
----------------------------
-If a sequence is decoded with `literals_length = 0` and `offset_value = 3`
-while `Repeated_Offset_1 = 1`, the computed offset will be `0`, which is
-invalid.
-
-The reference decompressor will process this case as if the computed
-offset was `1`, including inserting `1` into the repeated offset list.
\ No newline at end of file
diff --git a/doc/decompressor_permissive.md b/doc/decompressor_permissive.md
new file mode 100644
index 00000000000..29846c31a1b
--- /dev/null
+++ b/doc/decompressor_permissive.md
@@ -0,0 +1,62 @@
+Decompressor Permissiveness to Invalid Data
+===========================================
+
+This document describes the behavior of the reference decompressor in cases
+where it accepts formally invalid data instead of reporting an error.
+
+While the reference decompressor *must* decode any compliant frame following
+the specification, its ability to detect erroneous data is on a best effort
+basis: the decoder may accept input data that would be formally invalid,
+when it causes no risk to the decoder, and which detection would cost too much
+complexity or speed regression.
+
+In practice, the vast majority of invalid data are detected, if only because
+many corruption events are dangerous for the decoder process (such as
+requesting an out-of-bound memory access) and many more are easy to check.
+
+This document lists a few known cases where invalid data was formerly accepted
+by the decoder, and what has changed since.
+
+
+Offset == 0
+-----------
+
+**Last affected version**: v1.5.5
+
+**Produced by the reference compressor**: No
+
+**Example Frame**: `28b5 2ffd 2000 1500 0000 00`
+
+If a sequence is decoded with `literals_length = 0` and `offset_value = 3`
+while `Repeated_Offset_1 = 1`, the computed offset will be `0`, which is
+invalid.
+
+The reference decompressor up to v1.5.5 processes this case as if the computed
+offset was `1`, including inserting `1` into the repeated offset list.
+This prevents the output buffer from remaining uninitialized, thus denying a
+potential attack vector from an untrusted source.
+However, in the rare case where this scenario would be the outcome of a
+transmission or storage error, the decoder relies on the checksum to detect
+the error.
+
+In newer versions, this case is always detected and reported as a corruption error.
+
+
+Non-zeroes reserved bits
+------------------------
+
+**Last affected version**: v1.5.5
+
+**Produced by the reference compressor**: No
+
+**Example Frame**: `28b5 2ffd 2000 1500 0000 00`
+
+The Sequences section of each block has a header, and one of its elements is a
+byte, which describes the compression mode of each symbol.
+This byte contains 2 reserved bits which must be set to zero.
+
+The reference decompressor up to v1.5.5 just ignores these 2 bits.
+This behavior has no consequence for the rest of the frame decoding process.
+
+In newer versions, the 2 reserved bits are actively checked for value zero,
+and the decoder reports a corruption error if they are not.

From eb5f7a7fa278ab76c3390555f36162c638f63b53 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Sat, 9 Mar 2024 00:33:44 -0800
Subject: [PATCH 230/283] produced golden sample for the offset==0 decoder test

is correctly detected as corrupted by new version,
and is accepted (changed into offset==1) by older version.

updated documentation accordingly, with an hexadecimal representation.
---
 doc/decompressor_permissive.md                 |   4 +---
 tests/golden-decompression-errors/off0.bin.zst | Bin 0 -> 17 bytes
 2 files changed, 1 insertion(+), 3 deletions(-)
 create mode 100644 tests/golden-decompression-errors/off0.bin.zst

diff --git a/doc/decompressor_permissive.md b/doc/decompressor_permissive.md
index 29846c31a1b..bd77165f0e3 100644
--- a/doc/decompressor_permissive.md
+++ b/doc/decompressor_permissive.md
@@ -25,7 +25,7 @@ Offset == 0
 
 **Produced by the reference compressor**: No
 
-**Example Frame**: `28b5 2ffd 2000 1500 0000 00`
+**Example Frame**: `28b5 2ffd 0000 4500 0008 0002 002f 430b ae`
 
 If a sequence is decoded with `literals_length = 0` and `offset_value = 3`
 while `Repeated_Offset_1 = 1`, the computed offset will be `0`, which is
@@ -49,8 +49,6 @@ Non-zeroes reserved bits
 
 **Produced by the reference compressor**: No
 
-**Example Frame**: `28b5 2ffd 2000 1500 0000 00`
-
 The Sequences section of each block has a header, and one of its elements is a
 byte, which describes the compression mode of each symbol.
 This byte contains 2 reserved bits which must be set to zero.
diff --git a/tests/golden-decompression-errors/off0.bin.zst b/tests/golden-decompression-errors/off0.bin.zst
new file mode 100644
index 0000000000000000000000000000000000000000..13493fb336c6e3e339c1c224a1a2ada1a8b57a0b
GIT binary patch
literal 17
YcmdPcs{faP!Igo5gMo=b-
Date: Mon, 11 Mar 2024 11:38:55 -0700
Subject: [PATCH 231/283] fix #3719

only disable `--rm` at end of command line parsing,
so that `-c` only disables `--rm` if it's effectively selected,
and not if it's overriden by a later `-o FILE` command.
---
 programs/zstd.1.md |  8 +++++---
 programs/zstdcli.c | 16 ++++++++++++----
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index c5d0ef70a36..b11ad900d2b 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -225,15 +225,17 @@ the last one takes effect.
     This parameter defines a loose target: compressed blocks will target this size "on average", but individual blocks can still be larger or smaller.
     Enabling this feature can decrease compression speed by up to ~10% at level 1.
     Higher levels will see smaller relative speed regression, becoming invisible at higher settings.
-* `-o FILE`:
-    save result into `FILE`.
 * `-f`, `--force`:
     disable input and output checks. Allows overwriting existing files, input
     from console, output to stdout, operating on links, block devices, etc.
     During decompression and when the output destination is stdout, pass-through
     unrecognized formats as-is.
 * `-c`, `--stdout`:
-    write to standard output (even if it is the console); keep original files unchanged.
+    write to standard output (even if it is the console); keep original files (disable `--rm`).
+* `-o FILE`:
+    save result into `FILE`.
+    This command is in conflict with `-c`.
+    If both are present on the command line, the last expressed one wins.
 * `--[no-]sparse`:
     enable / disable sparse FS support,
     to make files with many zeroes smaller on disk.
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index 1cb16ef50e7..4ea9034a103 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -1176,7 +1176,10 @@ int main(int argCount, const char* argv[])
                         operation=zom_decompress; argument++; break;
 
                     /* Force stdout, even if stdout==console */
-                case 'c': forceStdout=1; outFileName=stdoutmark; removeSrcFile=0; argument++; break;
+                case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break;
+
+                    /* destination file name */
+                case 'o': argument++; NEXT_FIELD(outFileName); break;
 
                     /* do not store filename - gzip compatibility - nothing to do */
                 case 'n': argument++; break;
@@ -1202,9 +1205,6 @@ int main(int argCount, const char* argv[])
                     /* test compressed file */
                 case 't': operation=zom_test; argument++; break;
 
-                    /* destination file name */
-                case 'o': argument++; NEXT_FIELD(outFileName); break;
-
                     /* limit memory */
                 case 'M':
                     argument++;
@@ -1367,6 +1367,14 @@ int main(int argCount, const char* argv[])
 #endif
     }
 
+    /* disable --rm when writing to stdout */
+    if (!strcmp(outFileName, stdoutmark)) {
+        if (removeSrcFile) {
+            DISPLAYLEVEL(2, "warning: source not removed when writing to stdout \n");
+            removeSrcFile = 0;
+        }
+    }
+
     /* Check if benchmark is selected */
     if (operation==zom_bench) {
 #ifndef ZSTD_NOBENCH

From fbd9e628ae124d4bbf4db0b8afd54b6b6e653b29 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 11 Mar 2024 12:17:34 -0700
Subject: [PATCH 232/283] added tests

---
 programs/zstdcli.c | 10 +---------
 tests/playTests.sh | 32 ++++++++++++++++++++++++++++++--
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index 4ea9034a103..9dd6b051a7b 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -962,7 +962,7 @@ int main(int argCount, const char* argv[])
                 if (!strcmp(argument, "--help")) { usageAdvanced(programName); CLEAN_RETURN(0); }
                 if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; }
                 if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }
-                if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; removeSrcFile=0; continue; }
+                if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; continue; }
                 if (!strcmp(argument, "--ultra")) { ultra=1; continue; }
                 if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(prefs, 2); continue; }
                 if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(prefs, 0); continue; }
@@ -1367,14 +1367,6 @@ int main(int argCount, const char* argv[])
 #endif
     }
 
-    /* disable --rm when writing to stdout */
-    if (!strcmp(outFileName, stdoutmark)) {
-        if (removeSrcFile) {
-            DISPLAYLEVEL(2, "warning: source not removed when writing to stdout \n");
-            removeSrcFile = 0;
-        }
-    }
-
     /* Check if benchmark is selected */
     if (operation==zom_bench) {
 #ifndef ZSTD_NOBENCH
diff --git a/tests/playTests.sh b/tests/playTests.sh
index bf5fba89b35..dc7794654aa 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -234,12 +234,23 @@ unset ZSTD_CLEVEL
 println "test : compress to stdout"
 zstd tmp -c > tmpCompressed
 zstd tmp --stdout > tmpCompressed       # long command format
-println "test : compress to named file"
+
+println "test : compress to named file (-o)"
 rm -f tmpCompressed
 zstd tmp -o tmpCompressed
 test -f tmpCompressed   # file must be created
+
 println "test : force write, correct order"
 zstd tmp -fo tmpCompressed
+
+println "test : -c + -o : last one wins"
+rm -f tmpOut
+zstd tmp -c > tmpCompressed -o tmpOut
+test -f tmpOut   # file must be created
+rm -f tmpCompressed
+zstd tmp -o tmpOut -c > tmpCompressed
+test -f tmpCompressed   # file must be created
+
 println "test : forgotten argument"
 cp tmp tmp2
 zstd tmp2 -fo && die "-o must be followed by filename "
@@ -394,6 +405,8 @@ println "test: --rm is disabled when output is stdout"
 test -f tmp
 zstd --rm tmp -c > $INTOVOID
 test -f tmp # tmp shall still be there
+zstd --rm tmp --stdout > $INTOVOID
+test -f tmp # tmp shall still be there
 zstd -f --rm tmp -c > $INTOVOID
 test -f tmp # tmp shall still be there
 zstd -f tmp -c > $INTOVOID --rm
@@ -411,7 +424,22 @@ zstd -f tmp tmp2 -o tmp3.zst --rm # just warns, no prompt
 test -f tmp
 test -f tmp2
 zstd -q tmp tmp2 -o tmp3.zst --rm && die "should refuse to concatenate"
-
+println "test: --rm is active with -o when single input"
+rm -f tmp2.zst
+zstd --rm tmp2 -o tmp2.zst
+test -f tmp2.zst
+test ! -f tmp2
+println "test: -c followed by -o => -o wins, so --rm remains active" # (#3719)
+rm tmp2.zst
+cp tmp tmp2
+zstd --rm tmp2 -c > $INTOVOID -o tmp2.zst
+test ! -f tmp2
+println "test: -o followed by -c => -c wins, so --rm is disabled" # (#3719)
+rm tmp3.zst
+cp tmp tmp2
+zstd -v --rm tmp2 -o tmp2.zst -c > tmp3.zst
+test -f tmp2
+test -f tmp3.zst
 println "test : should quietly not remove non-regular file"
 println hello > tmp
 zstd tmp -f -o "$DEVDEVICE" 2>tmplog > "$INTOVOID"

From 1362699e875994689390bbee3cba87d2c11a11fb Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 11 Mar 2024 12:23:37 -0700
Subject: [PATCH 233/283] minor man page clarification

---
 programs/zstd.1.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index b11ad900d2b..646e3cf28eb 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -234,8 +234,8 @@ the last one takes effect.
     write to standard output (even if it is the console); keep original files (disable `--rm`).
 * `-o FILE`:
     save result into `FILE`.
-    This command is in conflict with `-c`.
-    If both are present on the command line, the last expressed one wins.
+    Note that this operation is in conflict with `-c`.
+    If both operations are present on the command line, the last expressed one wins.
 * `--[no-]sparse`:
     enable / disable sparse FS support,
     to make files with many zeroes smaller on disk.

From f99a450ca4d5fdb25d0d9bc5ae4c5d4787fbcb87 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" 
Date: Mon, 11 Mar 2024 15:20:06 -0400
Subject: [PATCH 234/283] Stop Hardcoding the POSIX Version on BSDs

BSDs should all have a `unistd.h` header.
---
 programs/platform.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/programs/platform.h b/programs/platform.h
index bbe0965ae76..4d2b9490e6d 100644
--- a/programs/platform.h
+++ b/programs/platform.h
@@ -74,8 +74,7 @@ extern "C" {
 ***************************************************************/
 #ifndef PLATFORM_POSIX_VERSION
 
-#  if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \
-     || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)  /* BSD distros */
+#  if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */
      /* exception rule : force posix version to 200112L,
       * note: it's better to use unistd.h's _POSIX_VERSION whenever possible */
 #    define PLATFORM_POSIX_VERSION 200112L

From f6039f3d5fa607555fc193042671a05bf5029bad Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 11 Mar 2024 16:11:15 -0700
Subject: [PATCH 235/283] cmake refactor: move HP-UX specific logic into its
 own function

reduce visual clutter of corner case configuration.
---
 build/cmake/CMakeLists.txt | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/build/cmake/CMakeLists.txt b/build/cmake/CMakeLists.txt
index 50b1bb95c81..399b818fc20 100644
--- a/build/cmake/CMakeLists.txt
+++ b/build/cmake/CMakeLists.txt
@@ -114,20 +114,25 @@ endif ()
 #-----------------------------------------------------------------------------
 # External dependencies
 #-----------------------------------------------------------------------------
+# Define a function to handle special thread settings for HP-UX
+# See https://github.com/facebook/zstd/pull/3862 for details.
+function(setup_hpux_threads)
+    find_package(Threads)
+    if (NOT Threads_FOUND)
+        set(CMAKE_USE_PTHREADS_INIT 1 PARENT_SCOPE)
+        set(CMAKE_THREAD_LIBS_INIT -lpthread PARENT_SCOPE)
+        set(CMAKE_HAVE_THREADS_LIBRARY 1 PARENT_SCOPE)
+        set(Threads_FOUND TRUE PARENT_SCOPE)
+    endif()
+endfunction()
+
 if (ZSTD_MULTITHREAD_SUPPORT AND UNIX)
     if (CMAKE_SYSTEM_NAME MATCHES "HP-UX")
-        find_package(Threads)
-        if (NOT Threads_FOUND)
-            set(CMAKE_USE_PTHREADS_INIT 1)
-            set(CMAKE_THREAD_LIBS_INIT -lpthread)
-            set(CMAKE_HAVE_THREADS_LIBRARY 1)
-            set(Threads_FOUND TRUE)
-        endif ()
-    else ()
+        setup_hpux_threads()
+    else()
         set(THREADS_PREFER_PTHREAD_FLAG ON)
         find_package(Threads REQUIRED)
-    endif ()
-
+    endif()
     if (CMAKE_USE_PTHREADS_INIT)
         set(THREADS_LIBS "${CMAKE_THREAD_LIBS_INIT}")
     else()

From edab9eed66f02c7c3c8be849f22f20ffbd04976b Mon Sep 17 00:00:00 2001
From: Yonatan Komornik <11005061+yoniko@users.noreply.github.com>
Date: Mon, 11 Mar 2024 16:28:32 -0700
Subject: [PATCH 236/283] Fix AsyncIO reading seed queueing (#3940)

Fixes a bug in AsyncIO where we queue reads after opening a file so our queue will always be saturated (or as saturated as possible).
Previous code was looping up to `availableJobsCount` not realizing `availableJobsCount` was also decreasing in each iteration, so instead of queueing 10 jobs we'd queue 5 (and instead of 2 we'd queue 1).
This PR fixes the loop to queue as long as `availableJobsCount` is not 0.
---
 programs/fileio_asyncio.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/programs/fileio_asyncio.c b/programs/fileio_asyncio.c
index 5f7bd4a4ce1..ae6db69e0a9 100644
--- a/programs/fileio_asyncio.c
+++ b/programs/fileio_asyncio.c
@@ -514,8 +514,7 @@ static void AIO_ReadPool_enqueueRead(ReadPoolCtx_t* ctx) {
 }
 
 static void AIO_ReadPool_startReading(ReadPoolCtx_t* ctx) {
-    int i;
-    for (i = 0; i < ctx->base.availableJobsCount; i++) {
+    while(ctx->base.availableJobsCount) {
         AIO_ReadPool_enqueueRead(ctx);
     }
 }

From 74e856a195005c2358b5fb4d6c90c540c5b29812 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 11 Mar 2024 17:57:57 -0700
Subject: [PATCH 237/283] add tests inspired from #2927

centered around -T# and --fast=# arguments
---
 tests/playTests.sh | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/tests/playTests.sh b/tests/playTests.sh
index bf5fba89b35..2c9f791f9d0 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -326,6 +326,51 @@ if [ "$isWindows" = false ] && [ "$UNAME" != "AIX" ]; then
   fi
 fi
 
+println "\n===>  multiple_thread test "
+
+datagen > tmp
+println "test : single-thread "
+zstd --fast --single-thread tmp -o tmpMT0
+println "test : one worker thread (default)"
+zstd --fast -T1 tmp -o tmpMT1
+println "test : two worker threads "
+zstd --fast -T2 tmp -o tmpMT2
+println "test : 16-thread "
+zstd --fast -T16 tmp -o tmpMT3
+println "test : 127-thread "
+zstd --fast -T127 tmp -o tmpMT4
+println "test : 128-thread "
+zstd --fast -T128 tmp -o tmpMT5
+println "test : max allowed numeric value is 4294967295 "
+zstd --fast -4294967295 tmp -o tmpMT6
+println "test : numeric value overflows 32-bit unsigned int "
+zstd --fast -4294967296 tmp -o tmptest9 && die "max allowed numeric value is 4294967295"
+
+datagen > tmp
+println "test : basic compression "
+zstd -f tmp  # trivial compression case, creates tmp.zst
+println "test : basic decompression"
+zstd -d -f -T1 tmp.zst
+println "note : decompression does not support -T mode, but execution support"
+rm -rf tmpMT*
+
+println "\n===>  --fast_argument test "
+datagen > tmp
+println "test : basic compression "
+zstd -f tmp  # trivial compression case, creates tmp.zst
+println "test: --fast=1"
+zstd --fast=1 -f tmp
+println "test: --fast=99"
+zstd --fast=99 -f tmp
+println "test: Invalid value -- negative number"
+zstd --fast=-1 -f tmp && die "error: Invalid value -- negative number"
+println "test: Invalid value -- zero"
+zstd --fast=0 -f tmp && die "error: Invalid value -- 0 number"
+println "test: max allowed numeric argument of --fast is 4294967295"
+zstd --fast=4294967295 -f tmp
+println "test: numeric value overflows 32-bit unsigned int "
+zstd --fast=4294967296 -f tmp && die "max allowed argument of --fast is 4294967295"
+
 println "\n===>  --exclude-compressed flag"
 rm -rf precompressedFilterTestDir
 mkdir -p precompressedFilterTestDir

From e0872806df5c255d23c9c9ec95fb7db50127a9e6 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky 
Date: Tue, 12 Mar 2024 10:08:26 -0400
Subject: [PATCH 238/283] Use ZSTD_LEGACY_SUPPORT=5 in make test (#3943)

---
 lib/legacy/zstd_v02.c |  4 ++--
 lib/legacy/zstd_v03.c |  4 ++--
 tests/Makefile        | 27 ++++++++++++++++++---------
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index 80615e556db..6d39b6e5b2d 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -862,7 +862,7 @@ extern "C" {
 *  Streaming functions
 ***************************************/
 
-typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+typedef struct ZSTDv02_Dctx_s ZSTD_DCtx;
 
 /*
   Use above functions alternatively.
@@ -2737,7 +2737,7 @@ static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
 /* *************************************************************
 *   Decompression section
 ***************************************************************/
-struct ZSTD_DCtx_s
+struct ZSTDv02_Dctx_s
 {
     U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
     U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
index 082fe870502..47195f33741 100644
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@@ -862,7 +862,7 @@ extern "C" {
 *  Streaming functions
 ***************************************/
 
-typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+typedef struct ZSTDv03_Dctx_s ZSTD_DCtx;
 
 /*
   Use above functions alternatively.
@@ -2377,7 +2377,7 @@ static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
 /* *************************************************************
 *   Decompression section
 ***************************************************************/
-struct ZSTD_DCtx_s
+struct ZSTDv03_Dctx_s
 {
     U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
     U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
diff --git a/tests/Makefile b/tests/Makefile
index 4eb77fdec6b..3550b7a9c62 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -20,7 +20,7 @@
 # zstreamtest32: Same as zstreamtest, but forced to compile in 32-bits mode
 # ##########################################################################
 
-ZSTD_LEGACY_SUPPORT ?= 0
+ZSTD_LEGACY_SUPPORT ?= 5
 
 DEBUGLEVEL ?= 2
 export DEBUGLEVEL  # transmit value to sub-makefiles
@@ -33,28 +33,31 @@ PYTHON ?= python3
 TESTARTEFACT := versionsTest
 
 DEBUGFLAGS += -g -Wno-c++-compat
-CPPFLAGS   += -I$(LIB_SRCDIR) -I$(LIB_SRCDIR)/common -I$(LIB_SRCDIR)/compress \
+CPPFLAGS   += -I$(LIB_SRCDIR) -I$(LIB_SRCDIR)/common -I$(LIB_SRCDIR)/compress -I$(LIB_SRCDIR)/legacy \
               -I$(LIB_SRCDIR)/dictBuilder -I$(LIB_SRCDIR)/deprecated -I$(PRGDIR) \
               -DZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY=1
 
 ZSTDCOMMON_FILES := $(sort $(ZSTD_COMMON_FILES))
 ZSTDCOMP_FILES   := $(sort $(ZSTD_COMPRESS_FILES))
 ZSTDDECOMP_FILES := $(sort $(ZSTD_DECOMPRESS_FILES))
-ZSTD_FILES  := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES)
+ZSTDLEGACY_FILES := $(sort $(wildcard $(LIB_SRCDIR)/legacy/*.c))
+ZSTD_FILES  := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) $(ZSTDLEGACY_FILES)
 ZDICT_FILES := $(sort $(ZSTD_DICTBUILDER_FILES))
 
 ZSTD_F1 := $(sort $(wildcard $(ZSTD_FILES)))
 ZSTD_OBJ1 := $(subst $(LIB_SRCDIR)/common/,zstdm_,$(ZSTD_F1))
 ZSTD_OBJ2 := $(subst $(LIB_SRCDIR)/compress/,zstdc_,$(ZSTD_OBJ1))
 ZSTD_OBJ3 := $(subst $(LIB_SRCDIR)/decompress/,zstdd_,$(ZSTD_OBJ2))
-ZSTD_OBJ4 := $(ZSTD_OBJ3:.c=.o)
-ZSTD_OBJECTS := $(ZSTD_OBJ4:.S=.o)
+ZSTD_OBJ4 := $(subst $(LIB_SRCDIR)/legacy/,zstdl_,$(ZSTD_OBJ3))
+ZSTD_OBJ5 := $(ZSTD_OBJ4:.c=.o)
+ZSTD_OBJECTS := $(ZSTD_OBJ5:.S=.o)
 
 ZSTDMT_OBJ1 := $(subst $(LIB_SRCDIR)/common/,zstdmt_m_,$(ZSTD_F1))
 ZSTDMT_OBJ2 := $(subst $(LIB_SRCDIR)/compress/,zstdmt_c_,$(ZSTDMT_OBJ1))
 ZSTDMT_OBJ3 := $(subst $(LIB_SRCDIR)/decompress/,zstdmt_d_,$(ZSTDMT_OBJ2))
-ZSTDMT_OBJ4 := $(ZSTDMT_OBJ3:.c=.o)
-ZSTDMT_OBJECTS := $(ZSTDMT_OBJ4:.S=.o)
+ZSTDMT_OBJ4 := $(subst $(LIB_SRCDIR)/legacy/,zstdmt_l_,$(ZSTDMT_OBJ3))
+ZSTDMT_OBJ5 := $(ZSTDMT_OBJ4:.c=.o)
+ZSTDMT_OBJECTS := $(ZSTDMT_OBJ5:.S=.o)
 
 # Define *.exe as extension for Windows systems
 ifneq (,$(filter Windows%,$(OS)))
@@ -118,6 +121,9 @@ zstdd_%.o : $(LIB_SRCDIR)/decompress/%.c
 zstdd_%.o : $(LIB_SRCDIR)/decompress/%.S
 	$(CC) -c $(CPPFLAGS) $(ASFLAGS) $< -o $@
 
+zstdl_%.o : $(LIB_SRCDIR)/legacy/%.c
+	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
+
 zstdmt%.o : CPPFLAGS += $(MULTITHREAD_CPP)
 
 zstdmt_m_%.o : $(LIB_SRCDIR)/common/%.c
@@ -132,6 +138,9 @@ zstdmt_d_%.o : $(LIB_SRCDIR)/decompress/%.c
 zstdmt_d_%.o : $(LIB_SRCDIR)/decompress/%.S
 	$(CC) -c $(CPPFLAGS) $(ASFLAGS) $< -o $@
 
+zstdmt_l_%.o : $(LIB_SRCDIR)/legacy/%.c
+	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
+
 FULLBENCHS := fullbench fullbench32
 CLEAN += $(FULLBENCHS)
 fullbench32: CPPFLAGS += -m32
@@ -222,8 +231,8 @@ CLEAN += invalidDictionaries
 invalidDictionaries : $(ZSTD_OBJECTS) invalidDictionaries.c
 
 CLEAN += legacy
-legacy : CPPFLAGS += -I$(LIB_SRCDIR)/legacy -UZSTD_LEGACY_SUPPORT -DZSTD_LEGACY_SUPPORT=4
-legacy : $(ZSTD_FILES) $(sort $(wildcard $(LIB_SRCDIR)/legacy/*.c)) legacy.c
+legacy : CPPFLAGS += -UZSTD_LEGACY_SUPPORT -DZSTD_LEGACY_SUPPORT=4
+legacy : $(ZSTD_FILES) legacy.c
 
 CLEAN += decodecorpus
 decodecorpus : LDLIBS += -lm

From ee6acaf26bbf842837513087c91776b83d4d9560 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky 
Date: Tue, 12 Mar 2024 11:25:00 -0400
Subject: [PATCH 239/283] Pin tsan and msan CI jobs to ubuntu-20.04 (#3945)

---
 .github/workflows/dev-long-tests.yml | 34 ++++++++++++++--------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml
index 21882f024b8..eb8f40a9a44 100644
--- a/.github/workflows/dev-long-tests.yml
+++ b/.github/workflows/dev-long-tests.yml
@@ -60,14 +60,14 @@ jobs:
       run: MOREFLAGS="-DZSTD_NO_INTRINSICS" make -C tests fuzztest
 
   tsan-zstreamtest:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: thread sanitizer zstreamtest
       run: CC=clang ZSTREAM_TESTTIME=-T3mn make tsan-test-zstream
 
   ubsan-zstreamtest:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: undefined behavior sanitizer zstreamtest
@@ -75,7 +75,7 @@ jobs:
 
   # lasts ~15mn
   tsan-fuzztest:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: thread sanitizer fuzztest
@@ -94,7 +94,7 @@ jobs:
 
   # lasts ~23mn
   gcc-8-asan-ubsan-testzstd:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
     steps:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: gcc-8 + ASan + UBSan + Test Zstd
@@ -106,14 +106,14 @@ jobs:
         CC=gcc-8 make -j uasan-test-zstd 
Date: Tue, 12 Mar 2024 23:44:42 +0800
Subject: [PATCH 240/283] chore: fix some typos (#3949)

Signed-off-by: acceptacross 
---
 lib/Makefile      | 2 +-
 programs/Makefile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/Makefile b/lib/Makefile
index 754c909609d..8bfdade9f12 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,7 +8,7 @@
 # You may select, at your option, one of the above-listed licenses.
 # ################################################################
 
-# default target (when runing `make` with no argument)
+# default target (when running `make` with no argument)
 lib-release:
 
 # Modules
diff --git a/programs/Makefile b/programs/Makefile
index 6cd5c1eee55..4dcd84105bb 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -15,7 +15,7 @@
 # zstd-decompress : decompressor-only version of zstd
 # ##########################################################################
 
-# default target (when runing `make` with no argument)
+# default target (when running `make` with no argument)
 zstd-release:
 
 LIBZSTD_MK_DIR = ../lib

From b39c76765b761c9c3c3c23db3ed55f3f825f7e4d Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 12 Mar 2024 09:28:25 -0700
Subject: [PATCH 241/283] Add the zeroSeq sample

that should have been part of #3674
---
 tests/golden-decompression-errors/.gitignore       |   1 +
 .../zeroSeq_extraneous.zst                         | Bin 0 -> 27 bytes
 2 files changed, 1 insertion(+)
 create mode 100644 tests/golden-decompression-errors/.gitignore
 create mode 100644 tests/golden-decompression-errors/zeroSeq_extraneous.zst

diff --git a/tests/golden-decompression-errors/.gitignore b/tests/golden-decompression-errors/.gitignore
new file mode 100644
index 00000000000..574b375060d
--- /dev/null
+++ b/tests/golden-decompression-errors/.gitignore
@@ -0,0 +1 @@
+!*.zst
diff --git a/tests/golden-decompression-errors/zeroSeq_extraneous.zst b/tests/golden-decompression-errors/zeroSeq_extraneous.zst
new file mode 100644
index 0000000000000000000000000000000000000000..0953be343f3cf3b334bd4281d63d9863cda309c0
GIT binary patch
literal 27
icmdPcs{faPVJZVdhDT~nPQF5Teo;<}B3A
Date: Tue, 12 Mar 2024 09:47:54 -0700
Subject: [PATCH 242/283] add same .gitignore rule in golden-decompression/

as requested by @embg
---
 tests/golden-decompression/.gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 tests/golden-decompression/.gitignore

diff --git a/tests/golden-decompression/.gitignore b/tests/golden-decompression/.gitignore
new file mode 100644
index 00000000000..574b375060d
--- /dev/null
+++ b/tests/golden-decompression/.gitignore
@@ -0,0 +1 @@
+!*.zst

From 37ff4f91eba72a936771b177c83d27151d33e2f1 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 12 Mar 2024 10:47:27 -0700
Subject: [PATCH 243/283] removed golden-decompression/.gitignore

replaced by an exclusion rule in tests/.gitignore
---
 tests/.gitignore                      | 3 +++
 tests/golden-decompression/.gitignore | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)
 delete mode 100644 tests/golden-decompression/.gitignore

diff --git a/tests/.gitignore b/tests/.gitignore
index fcb865d61ec..311a8b5ebeb 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -67,3 +67,6 @@ speedTest.pid
 *.exe
 *.out
 *.app
+
+# Specific exclusions
+!golden-decompression/*.zst
diff --git a/tests/golden-decompression/.gitignore b/tests/golden-decompression/.gitignore
deleted file mode 100644
index 574b375060d..00000000000
--- a/tests/golden-decompression/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-!*.zst

From 5a66afa0514d0853b0f2a6b5ff3df1ae706f4862 Mon Sep 17 00:00:00 2001
From: daniellerozenblit <48103643+daniellerozenblit@users.noreply.github.com>
Date: Tue, 12 Mar 2024 13:49:06 -0400
Subject: [PATCH 244/283] Add common file extensions to --exclude-compressed
 (#3951)

---
 programs/fileio.c  | 104 +++++++++++++++++++++++++++++++++++++++++++++
 programs/zstd.1.md |   2 +
 tests/playTests.sh |  13 ++++++
 3 files changed, 119 insertions(+)

diff --git a/programs/fileio.c b/programs/fileio.c
index a7597d893d1..6fd55d9a801 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -1907,6 +1907,110 @@ static const char *compressedFileExtensions[] = {
     TXZ_EXTENSION,
     LZ4_EXTENSION,
     TLZ4_EXTENSION,
+    ".7z",
+    ".aa3",
+    ".aac",
+    ".aar",
+    ".ace",
+    ".alac",
+    ".ape",
+    ".apk",
+    ".apng",
+    ".arc",
+    ".archive",
+    ".arj",
+    ".ark",
+    ".asf",
+    ".avi",
+    ".avif",
+    ".ba",
+    ".br",
+    ".bz2",
+    ".cab",
+    ".cdx",
+    ".chm",
+    ".cr2",
+    ".divx",
+    ".dmg",
+    ".dng",
+    ".docm",
+    ".docx",
+    ".dotm",
+    ".dotx",
+    ".dsft",
+    ".ear",
+    ".eftx",
+    ".emz",
+    ".eot",
+    ".epub",
+    ".f4v",
+    ".flac",
+    ".flv",
+    ".gho",
+    ".gif",
+    ".gifv",
+    ".gnp",
+    ".iso",
+    ".jar",
+    ".jpeg",
+    ".jpg",
+    ".jxl",
+    ".lz",
+    ".lzh",
+    ".m4a",
+    ".m4v",
+    ".mkv",
+    ".mov",
+    ".mp2",
+    ".mp3",
+    ".mp4",
+    ".mpa",
+    ".mpc",
+    ".mpe",
+    ".mpeg",
+    ".mpg",
+    ".mpl",
+    ".mpv",
+    ".msi",
+    ".odp",
+    ".ods",
+    ".odt",
+    ".ogg",
+    ".ogv",
+    ".otp",
+    ".ots",
+    ".ott",
+    ".pea",
+    ".png",
+    ".pptx",
+    ".qt",
+    ".rar",
+    ".s7z",
+    ".sfx",
+    ".sit",
+    ".sitx",
+    ".sqx",
+    ".svgz",
+    ".swf",
+    ".tbz2",
+    ".tib",
+    ".tlz",
+    ".vob",
+    ".war",
+    ".webm",
+    ".webp",
+    ".wma",
+    ".wmv",
+    ".woff",
+    ".woff2",
+    ".wvl",
+    ".xlsx",
+    ".xpi",
+    ".xps",
+    ".zip",
+    ".zipx",
+    ".zoo",
+    ".zpaq",
     NULL
 };
 
diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index 646e3cf28eb..a5046932ee1 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -307,6 +307,8 @@ the last one takes effect.
 * `--show-default-cparams`:
     shows the default compression parameters that will be used for a particular input file, based on the provided compression level and the input size.
     If the provided file is not a regular file (e.g. a pipe), this flag will output the parameters used for inputs of unknown size.
+* `--exclude-compressed`:
+    only compress files that are not already compressed.
 * `--`:
     All arguments after `--` are treated as files
 
diff --git a/tests/playTests.sh b/tests/playTests.sh
index a79c06c9948..e2a0694f573 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -410,6 +410,19 @@ zstd --long --rm -r precompressedFilterTestDir
 # Files should get compressed again without the --exclude-compressed flag.
 test -f precompressedFilterTestDir/input.5.zst.zst
 test -f precompressedFilterTestDir/input.6.zst.zst
+
+# Test some other compressed file extensions
+datagen $size > precompressedFilterTestDir/input.flac
+datagen $size > precompressedFilterTestDir/input.mov
+datagen $size > precompressedFilterTestDir/input.mp3
+zstd --exclude-compressed --long --rm -r precompressedFilterTestDir
+test ! -f precompressedFilterTestDir/input.flac.zst
+test ! -f precompressedFilterTestDir/input.mov.zst
+test ! -f precompressedFilterTestDir/input.mp3.zst
+zstd --long --rm -r precompressedFilterTestDir
+test -f precompressedFilterTestDir/input.flac.zst
+test -f precompressedFilterTestDir/input.mov.zst
+test -f precompressedFilterTestDir/input.mp3.zst
 rm -rf precompressedFilterTestDir
 println "Test completed"
 

From 83ec3d0164887904a7ae7f3382051ed20d5792b2 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 12 Mar 2024 11:27:42 -0700
Subject: [PATCH 245/283] no longer truncate file name in verbose mode

fix #3702
---
 programs/fileio.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/programs/fileio.c b/programs/fileio.c
index 6fd55d9a801..e3012a71667 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -2441,9 +2441,10 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress,
     U64 frameSize = 0;
     IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
 
-    /* display last 20 characters only */
+    /* display last 20 characters only when not --verbose */
     {   size_t const srcFileLength = strlen(srcFileName);
-        if (srcFileLength>20) srcFileName += srcFileLength-20;
+        if ((srcFileLength>20) && (g_display_prefs.displayLevel<3))
+            srcFileName += srcFileLength-20;
     }
 
     ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only);

From 92fbd42894e4dd9d58d3184923b17dda94ca6b44 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky 
Date: Tue, 12 Mar 2024 14:36:54 -0400
Subject: [PATCH 246/283] Export ZSTD_LEGACY_SUPPORT in tests/Makefile (#3955)

This doesn't affect most of the targets, but will help me sleep better at night knowing that future refactors won't break the legacy support.

Should have been included in https://github.com/facebook/zstd/pull/3943 but I noticed after that merged, so putting up a separate PR.
---
 tests/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/Makefile b/tests/Makefile
index 3550b7a9c62..ed3692a24e8 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -21,6 +21,7 @@
 # ##########################################################################
 
 ZSTD_LEGACY_SUPPORT ?= 5
+export ZSTD_LEGACY_SUPPORT
 
 DEBUGLEVEL ?= 2
 export DEBUGLEVEL  # transmit value to sub-makefiles

From 5473b72a05ad03555fed8774f7e5af5e99e27e47 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 12 Mar 2024 12:27:33 -0700
Subject: [PATCH 247/283] updated documentation

following recommendations by @zougloub at #3698
---
 programs/zstd.1    | 487 +++++++++++++++++++++++++++++++--------------
 programs/zstd.1.md | 341 ++++++++++++++++---------------
 2 files changed, 514 insertions(+), 314 deletions(-)

diff --git a/programs/zstd.1 b/programs/zstd.1
index 383d9947087..1eb9b97389b 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -1,381 +1,566 @@
-.TH "ZSTD" "1" "March 2023" "zstd 1.5.5" "User Commands"
+.
+.TH "ZSTD" "1" "March 2024" "zstd 1.5.5" "User Commands"
+.
 .SH "NAME"
 \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
+.
 .SH "SYNOPSIS"
-.TS
-allbox;
-\fBzstd\fR [\fIOPTIONS\fR] [\-	\fIINPUT\-FILE\fR] [\-o \fIOUTPUT\-FILE\fR]
-.TE
+\fBzstd\fR [\fIOPTIONS\fR] [\-|\fIINPUT\-FILE\fR] [\-o \fIOUTPUT\-FILE\fR]
+.
 .P
 \fBzstdmt\fR is equivalent to \fBzstd \-T0\fR
+.
 .P
 \fBunzstd\fR is equivalent to \fBzstd \-d\fR
+.
 .P
 \fBzstdcat\fR is equivalent to \fBzstd \-dcf\fR
+.
 .SH "DESCRIPTION"
-\fBzstd\fR is a fast lossless compression algorithm and data compression tool, with command line syntax similar to \fBgzip\fR(1) and \fBxz\fR(1)\. It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages\. \fBzstd\fR offers highly configurable compression speed, from fast modes at > 200 MB/s per core, to strong modes with excellent compression ratios\. It also features a very fast decoder, with speeds > 500 MB/s per core\.
+\fBzstd\fR is a fast lossless compression algorithm and data compression tool, with command line syntax similar to \fBgzip\fR(1) and \fBxz\fR(1)\. It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages\. \fBzstd\fR offers highly configurable compression speed, from fast modes at > 200 MB/s per core, to strong modes with excellent compression ratios\. It also features a very fast decoder, with speeds > 500 MB/s per core, which remains roughly stable at all compression settings\.
+.
 .P
-\fBzstd\fR command line syntax is generally similar to gzip, but features the following differences:
-.IP "\[ci]" 4
+\fBzstd\fR command line syntax is generally similar to gzip, but features the following few differences:
+.
+.IP "\(bu" 4
 Source files are preserved by default\. It\'s possible to remove them automatically by using the \fB\-\-rm\fR command\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 When compressing a single file, \fBzstd\fR displays progress notifications and result summary by default\. Use \fB\-q\fR to turn them off\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fBzstd\fR displays a short help page when command line is an error\. Use \fB\-q\fR to turn it off\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fBzstd\fR does not accept input from console, though it does accept \fBstdin\fR when it\'s not the console\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fBzstd\fR does not store the input\'s filename or attributes, only its contents\.
+.
 .IP "" 0
+.
 .P
 \fBzstd\fR processes each \fIfile\fR according to the selected operation mode\. If no \fIfiles\fR are given or \fIfile\fR is \fB\-\fR, \fBzstd\fR reads from standard input and writes the processed data to standard output\. \fBzstd\fR will refuse to write compressed data to standard output if it is a terminal: it will display an error message and skip the file\. Similarly, \fBzstd\fR will refuse to read compressed data from standard input if it is a terminal\.
+.
 .P
 Unless \fB\-\-stdout\fR or \fB\-o\fR is specified, \fIfiles\fR are written to a new file whose name is derived from the source \fIfile\fR name:
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 When compressing, the suffix \fB\.zst\fR is appended to the source filename to get the target filename\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 When decompressing, the \fB\.zst\fR suffix is removed from the source filename to get the target filename
+.
 .IP "" 0
+.
 .SS "Concatenation with \.zst Files"
 It is possible to concatenate multiple \fB\.zst\fR files\. \fBzstd\fR will decompress such agglomerated file as if it was a single \fB\.zst\fR file\.
+.
 .SH "OPTIONS"
+.
 .SS "Integer Suffixes and Special Values"
 In most places where an integer argument is expected, an optional suffix is supported to easily indicate large integers\. There must be no space between the integer and the suffix\.
+.
 .TP
 \fBKiB\fR
-Multiply the integer by 1,024 (2\e^10)\. \fBKi\fR, \fBK\fR, and \fBKB\fR are accepted as synonyms for \fBKiB\fR\.
+Multiply the integer by 1,024 (2^10)\. \fBKi\fR, \fBK\fR, and \fBKB\fR are accepted as synonyms for \fBKiB\fR\.
+.
 .TP
 \fBMiB\fR
-Multiply the integer by 1,048,576 (2\e^20)\. \fBMi\fR, \fBM\fR, and \fBMB\fR are accepted as synonyms for \fBMiB\fR\.
+Multiply the integer by 1,048,576 (2^20)\. \fBMi\fR, \fBM\fR, and \fBMB\fR are accepted as synonyms for \fBMiB\fR\.
+.
 .SS "Operation Mode"
 If multiple operation mode options are given, the last one takes effect\.
+.
 .TP
 \fB\-z\fR, \fB\-\-compress\fR
 Compress\. This is the default operation mode when no operation mode option is specified and no other operation mode is implied from the command name (for example, \fBunzstd\fR implies \fB\-\-decompress\fR)\.
+.
 .TP
 \fB\-d\fR, \fB\-\-decompress\fR, \fB\-\-uncompress\fR
 Decompress\.
+.
 .TP
 \fB\-t\fR, \fB\-\-test\fR
 Test the integrity of compressed \fIfiles\fR\. This option is equivalent to \fB\-\-decompress \-\-stdout > /dev/null\fR, decompressed data is discarded and checksummed for errors\. No files are created or removed\.
+.
 .TP
 \fB\-b#\fR
 Benchmark file(s) using compression level \fI#\fR\. See \fIBENCHMARK\fR below for a description of this operation\.
+.
 .TP
 \fB\-\-train FILES\fR
 Use \fIFILES\fR as a training set to create a dictionary\. The training set should contain a lot of small files (> 100)\. See \fIDICTIONARY BUILDER\fR below for a description of this operation\.
+.
 .TP
 \fB\-l\fR, \fB\-\-list\fR
 Display information related to a zstd compressed file, such as size, ratio, and checksum\. Some of these fields may not be available\. This command\'s output can be augmented with the \fB\-v\fR modifier\.
+.
 .SS "Operation Modifiers"
-.IP "\[ci]" 4
-\fB\-#\fR: selects \fB#\fR compression level [1\-19] (default: 3)
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
+\fB\-#\fR: selects \fB#\fR compression level [1\-19] (default: 3)\. Higher compression levels \fIgenerally\fR produce higher compression ratio at the expense of speed and memory\. A rough rule of thumb is that compression speed is expected to be divided by 2 every 2 levels\. Technically, each level is mapped to a set of advanced parameters (that can also be modified individually, see below)\. Because the compressor\'s behavior highly depends on the content to compress, there\'s no guarantee of a smooth progression from one level to another\.
+.
+.IP "\(bu" 4
 \fB\-\-ultra\fR: unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-fast[=#]\fR: switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-T#\fR, \fB\-\-threads=#\fR: Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to \fBZSTDMT_NBWORKERS_MAX\fR, which is either 64 in 32\-bit mode, or 256 for 64\-bit environments\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-single\-thread\fR: Use a single thread for both I/O and compression\. As compression is serialized with I/O, this can be slightly slower\. Single\-thread mode features significantly lower memory usage, which can be useful for systems with limited amount of memory, such as 32\-bit systems\.
+.
 .IP
 Note 1: this mode is the only available one when multithread support is disabled\.
+.
 .IP
 Note 2: this mode is different from \fB\-T1\fR, which spawns 1 compression thread in parallel with I/O\. Final compressed result is also slightly different from \fB\-T1\fR\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-auto\-threads={physical,logical} (default: physical)\fR: When using a default amount of threads via \fB\-T0\fR, choose the default based on the number of detected physical or logical cores\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-adapt[=min=#,max=#]\fR: \fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MiB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\.
+.
 .IP
 \fINote\fR: at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-long[=#]\fR: enables long distance matching with \fB#\fR \fBwindowLog\fR, if \fB#\fR is not present it defaults to \fB27\fR\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance\.
+.
 .IP
 Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-D DICT\fR: use \fBDICT\fR as Dictionary to compress or decompress FILE(s)
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-patch\-from FILE\fR: Specify the file to be used as a reference point for zstd\'s diff engine\. This is effectively dictionary compression with some convenient parameter selection, namely that \fIwindowSize\fR > \fIsrcSize\fR\.
+.
 .IP
 Note: cannot use both this and \fB\-D\fR together\.
+.
 .IP
 Note: \fB\-\-long\fR mode will be automatically activated if \fIchainLog\fR < \fIfileLog\fR (\fIfileLog\fR being the \fIwindowLog\fR required to cover the whole file)\. You can also manually force it\.
+.
 .IP
 Note: for all levels, you can use \fB\-\-patch\-from\fR in \fB\-\-single\-thread\fR mode to improve compression ratio at the cost of speed\.
+.
 .IP
 Note: for level 19, you can get increased compression ratio at the cost of speed by specifying \fB\-\-zstd=targetLength=\fR to be something large (i\.e\. 4096), and by setting a large \fB\-\-zstd=chainLog=\fR\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-rsyncable\fR: \fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and a potential impact to compression speed, perceptible at higher speeds, for example when combining \fB\-\-rsyncable\fR with many parallel worker threads\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your mileage may vary\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-C\fR, \fB\-\-[no\-]check\fR: add integrity check computed from uncompressed data (default: enabled)
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-[no\-]content\-size\fR: enable / disable whether or not the original size of the file is placed in the header of the compressed file\. The default option is \fB\-\-content\-size\fR (meaning that the original size will be placed in the header)\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-no\-dictID\fR: do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-M#\fR, \fB\-\-memory=#\fR: Set a memory usage limit\. By default, \fBzstd\fR uses 128 MiB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can override this manually if need be in either direction (i\.e\. you can increase or decrease it)\.
+.
 .IP
 This is also used during compression when using with \fB\-\-patch\-from=\fR\. In this case, this parameter overrides that maximum size allowed for a dictionary\. (128 MiB)\.
+.
 .IP
 Additionally, this can be used to limit memory for dictionary training\. This parameter overrides the default limit of 2 GiB\. zstd will load training samples up to the memory limit and ignore the rest\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-stream\-size=#\fR: Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-size\-hint=#\fR: When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\.
-.IP "\[ci]" 4
-\fB\-o FILE\fR: save result into \fBFILE\fR\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
+\fB\-\-target\-compressed\-block\-size=#\fR: Attempt to produce compressed blocks of approximately this size\. This will split larger blocks in order to approach this target\. This feature is notably useful for improved latency, when the receiver can leverage receiving early incomplete data\. This parameter defines a loose target: compressed blocks will target this size "on average", but individual blocks can still be larger or smaller\. Enabling this feature can decrease compression speed by up to ~10% at level 1\. Higher levels will see smaller relative speed regression, becoming invisible at higher settings\.
+.
+.IP "\(bu" 4
 \fB\-f\fR, \fB\-\-force\fR: disable input and output checks\. Allows overwriting existing files, input from console, output to stdout, operating on links, block devices, etc\. During decompression and when the output destination is stdout, pass\-through unrecognized formats as\-is\.
-.IP "\[ci]" 4
-\fB\-c\fR, \fB\-\-stdout\fR: write to standard output (even if it is the console); keep original files unchanged\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
+\fB\-c\fR, \fB\-\-stdout\fR: write to standard output (even if it is the console); keep original files (disable \fB\-\-rm\fR)\.
+.
+.IP "\(bu" 4
+\fB\-o FILE\fR: save result into \fBFILE\fR\. Note that this operation is in conflict with \fB\-c\fR\. If both operations are present on the command line, the last expressed one wins\.
+.
+.IP "\(bu" 4
 \fB\-\-[no\-]sparse\fR: enable / disable sparse FS support, to make files with many zeroes smaller on disk\. Creating sparse files may save disk space and speed up decompression by reducing the amount of disk I/O\. default: enabled when output is into a file, and disabled when output is stdout\. This setting overrides default and can force sparse mode over stdout\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-[no\-]pass\-through\fR enable / disable passing through uncompressed files as\-is\. During decompression when pass\-through is enabled, unrecognized formats will be copied as\-is from the input to the output\. By default, pass\-through will occur when the output destination is stdout and the force (\fB\-f\fR) option is set\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-rm\fR: remove source file(s) after successful compression or decompression\. This command is silently ignored if output is \fBstdout\fR\. If used in combination with \fB\-o\fR, triggers a confirmation prompt (which can be silenced with \fB\-f\fR), as this is a destructive operation\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-k\fR, \fB\-\-keep\fR: keep source file(s) after successful compression or decompression\. This is the default behavior\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-r\fR: operate recursively on directories\. It selects all files in the named directory and all its subdirectories\. This can be useful both to reduce command line typing, and to circumvent shell expansion limitations, when there are a lot of files and naming breaks the maximum size of a command line\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-filelist FILE\fR read a list of files to process as content from \fBFILE\fR\. Format is compatible with \fBls\fR output, with one file per line\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-output\-dir\-flat DIR\fR: resulting files are stored into target \fBDIR\fR directory, instead of same directory as origin file\. Be aware that this command can introduce name collision issues, if multiple files, from different directories, end up having the same name\. Collision resolution ensures first file with a given name will be present in \fBDIR\fR, while in combination with \fB\-f\fR, the last file will be present instead\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-output\-dir\-mirror DIR\fR: similar to \fB\-\-output\-dir\-flat\fR, the output files are stored underneath target \fBDIR\fR directory, but this option will replicate input directory hierarchy into output \fBDIR\fR\.
+.
 .IP
 If input directory contains "\.\.", the files in this directory will be ignored\. If input directory is an absolute directory (i\.e\. "/var/tmp/abc"), it will be stored into the "output\-dir/var/tmp/abc"\. If there are multiple input files or directories, name collision resolution will follow the same rules as \fB\-\-output\-dir\-flat\fR\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-format=FORMAT\fR: compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBzstd\fR, \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\. If no such format is provided, \fBzstd\fR is the default\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR: display help/long help and exit
-.IP "\[ci]" 4
-\fB\-V\fR, \fB\-\-version\fR: display version number and exit\. Advanced: \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\. \fB\-q\fR will only display the version number, suitable for machine reading\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
+\fB\-V\fR, \fB\-\-version\fR: display version number and immediately exit\. note that, since it exits, flags specified after \fB\-V\fR are effectively ignored\. Advanced: \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\. \fB\-qV\fR will only display the version number, suitable for machine reading\.
+.
+.IP "\(bu" 4
 \fB\-v\fR, \fB\-\-verbose\fR: verbose mode, display more information
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-q\fR, \fB\-\-quiet\fR: suppress warnings, interactivity, and notifications\. specify twice to suppress errors too\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-no\-progress\fR: do not display the progress bar, but keep all other messages\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-show\-default\-cparams\fR: shows the default compression parameters that will be used for a particular input file, based on the provided compression level and the input size\. If the provided file is not a regular file (e\.g\. a pipe), this flag will output the parameters used for inputs of unknown size\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
+\fB\-\-exclude\-compressed\fR: only compress files that are not already compressed\.
+.
+.IP "\(bu" 4
 \fB\-\-\fR: All arguments after \fB\-\-\fR are treated as files
+.
 .IP "" 0
+.
 .SS "gzip Operation Modifiers"
 When invoked via a \fBgzip\fR symlink, \fBzstd\fR will support further options that intend to mimic the \fBgzip\fR behavior:
+.
 .TP
 \fB\-n\fR, \fB\-\-no\-name\fR
 do not store the original filename and timestamps when compressing a file\. This is the default behavior and hence a no\-op\.
+.
 .TP
 \fB\-\-best\fR
 alias to the option \fB\-9\fR\.
+.
 .SS "Environment Variables"
-Employing environment variables to set parameters has security implications\. Therefore, this avenue is intentionally limited\. Only \fBZSTD_CLEVEL\fR and \fBZSTD_NBTHREADS\fR are currently supported\. They set the compression level and number of threads to use during compression, respectively\.
+Employing environment variables to set parameters has security implications\. Therefore, this avenue is intentionally limited\. Only \fBZSTD_CLEVEL\fR and \fBZSTD_NBTHREADS\fR are currently supported\. They set the default compression level and number of threads to use during compression, respectively\.
+.
 .P
 \fBZSTD_CLEVEL\fR can be used to set the level between 1 and 19 (the "normal" range)\. If the value of \fBZSTD_CLEVEL\fR is not a valid integer, it will be ignored with a warning message\. \fBZSTD_CLEVEL\fR just replaces the default compression level (\fB3\fR)\.
+.
 .P
-\fBZSTD_NBTHREADS\fR can be used to set the number of threads \fBzstd\fR will attempt to use during compression\. If the value of \fBZSTD_NBTHREADS\fR is not a valid unsigned integer, it will be ignored with a warning message\. \fBZSTD_NBTHREADS\fR has a default value of (\fB1\fR), and is capped at ZSTDMT_NBWORKERS_MAX==200\. \fBzstd\fR must be compiled with multithread support for this to have any effect\.
+\fBZSTD_NBTHREADS\fR can be used to set the number of threads \fBzstd\fR will attempt to use during compression\. If the value of \fBZSTD_NBTHREADS\fR is not a valid unsigned integer, it will be ignored with a warning message\. \fBZSTD_NBTHREADS\fR has a default value of (\fB1\fR), and is capped at ZSTDMT_NBWORKERS_MAX==200\. \fBzstd\fR must be compiled with multithread support for this variable to have any effect\.
+.
 .P
 They can both be overridden by corresponding command line arguments: \fB\-#\fR for compression level and \fB\-T#\fR for number of compression threads\.
-.SH "DICTIONARY BUILDER"
-\fBzstd\fR offers \fIdictionary\fR compression, which greatly improves efficiency on small files and messages\. It\'s possible to train \fBzstd\fR with a set of samples, the result of which is saved into a file called a \fBdictionary\fR\. Then, during compression and decompression, reference the same dictionary, using command \fB\-D dictionaryFileName\fR\. Compression of small files similar to the sample set will be greatly improved\.
-.TP
-\fB\-\-train FILEs\fR
-Use FILEs as training set to create a dictionary\. The training set should ideally contain a lot of samples (> 100), and weight typically 100x the target dictionary size (for example, ~10 MB for a 100 KB dictionary)\. \fB\-\-train\fR can be combined with \fB\-r\fR to indicate a directory rather than listing all the files, which can be useful to circumvent shell expansion limits\.
-.IP
-Since dictionary compression is mostly effective for small files, the expectation is that the training set will only contain small files\. In the case where some samples happen to be large, only the first 128 KiB of these samples will be used for training\.
-.IP
-\fB\-\-train\fR supports multithreading if \fBzstd\fR is compiled with threading support (default)\. Additional advanced parameters can be specified with \fB\-\-train\-fastcover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. The slower cover dictionary builder can be accessed with \fB\-\-train\-cover\fR\. Default \fB\-\-train\fR is equivalent to \fB\-\-train\-fastcover=d=8,steps=4\fR\.
-.TP
-\fB\-o FILE\fR
-Dictionary saved into \fBFILE\fR (default name: dictionary)\.
-.TP
-\fB\-\-maxdict=#\fR
-Limit dictionary to specified size (default: 112640 bytes)\. As usual, quantities are expressed in bytes by default, and it\'s possible to employ suffixes (like \fBKB\fR or \fBMB\fR) to specify larger values\.
-.TP
-\fB\-#\fR
-Use \fB#\fR compression level during training (optional)\. Will generate statistics more tuned for selected compression level, resulting in a \fIsmall\fR compression ratio improvement for this level\.
-.TP
-\fB\-B#\fR
-Split input files into blocks of size # (default: no split)
-.TP
-\fB\-M#\fR, \fB\-\-memory=#\fR
-Limit the amount of sample data loaded for training (default: 2 GB)\. Note that the default (2 GB) is also the maximum\. This parameter can be useful in situations where the training set size is not well controlled and could be potentially very large\. Since speed of the training process is directly correlated to the size of the training sample set, a smaller sample set leads to faster training\.
-.IP
-In situations where the training set is larger than maximum memory, the CLI will randomly select samples among the available ones, up to the maximum allowed memory budget\. This is meant to improve dictionary relevance by mitigating the potential impact of clustering, such as selecting only files from the beginning of a list sorted by modification date, or sorted by alphabetical order\. The randomization process is deterministic, so training of the same list of files with the same parameters will lead to the creation of the same dictionary\.
-.TP
-\fB\-\-dictID=#\fR
-A dictionary ID is a locally unique ID\. The decoder will use this value to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to provide an explicit number ID instead\. It\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\. Note that short numbers have an advantage: an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\.
-.IP
-Note that RFC8878 reserves IDs less than 32768 and greater than or equal to 2\e^31, so they should not be used in public\.
-.TP
-\fB\-\-train\-cover[=k#,d=#,steps=#,split=#,shrink[=#]]\fR
-Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. If \fIsplit\fR is not specified or split <= 0, then the default value of 100 is used\. Requires that \fId\fR <= \fIk\fR\. If \fIshrink\fR flag is not used, then the default value for \fIshrinkDict\fR of 0 is used\. If \fIshrink\fR is not specified, then the default value for \fIshrinkDictMaxRegression\fR of 1 is used\.
-.IP
-Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. If \fIsplit\fR is 100, all input samples are used for both training and testing to find optimal \fId\fR and \fIk\fR to build dictionary\. Supports multithreading if \fBzstd\fR is compiled with threading support\. Having \fIshrink\fR enabled takes a truncated dictionary of minimum size and doubles in size until compression ratio of the truncated dictionary is at most \fIshrinkDictMaxRegression%\fR worse than the compression ratio of the largest dictionary\.
-.IP
-Examples:
-.IP
-\fBzstd \-\-train\-cover FILEs\fR
-.IP
-\fBzstd \-\-train\-cover=k=50,d=8 FILEs\fR
-.IP
-\fBzstd \-\-train\-cover=d=8,steps=500 FILEs\fR
-.IP
-\fBzstd \-\-train\-cover=k=50 FILEs\fR
-.IP
-\fBzstd \-\-train\-cover=k=50,split=60 FILEs\fR
-.IP
-\fBzstd \-\-train\-cover=shrink FILEs\fR
-.IP
-\fBzstd \-\-train\-cover=shrink=2 FILEs\fR
-.TP
-\fB\-\-train\-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]\fR
-Same as cover but with extra parameters \fIf\fR and \fIaccel\fR and different default value of split If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75\. If \fIf\fR is not specified, then it tries \fIf\fR = 20\. Requires that 0 < \fIf\fR < 32\. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1\. Requires that 0 < \fIaccel\fR <= 10\. Requires that \fId\fR = 6 or \fId\fR = 8\.
-.IP
-\fIf\fR is log of size of array that keeps track of frequency of subsegments of size \fId\fR\. The subsegment is hashed to an index in the range [0,2^\fIf\fR \- 1]\. It is possible that 2 different subsegments are hashed to the same index, and they are considered as the same subsegment when computing frequency\. Using a higher \fIf\fR reduces collision but takes longer\.
-.IP
-Examples:
-.IP
-\fBzstd \-\-train\-fastcover FILEs\fR
-.IP
-\fBzstd \-\-train\-fastcover=d=8,f=15,accel=2 FILEs\fR
-.TP
-\fB\-\-train\-legacy[=selectivity=#]\fR
-Use legacy dictionary builder algorithm with the given dictionary \fIselectivity\fR (default: 9)\. The smaller the \fIselectivity\fR value, the denser the dictionary, improving its efficiency but reducing its achievable maximum size\. \fB\-\-train\-legacy=s=#\fR is also accepted\.
-.IP
-Examples:
-.IP
-\fBzstd \-\-train\-legacy FILEs\fR
-.IP
-\fBzstd \-\-train\-legacy=selectivity=8 FILEs\fR
-.SH "BENCHMARK"
-.TP
-\fB\-b#\fR
-benchmark file(s) using compression level #
-.TP
-\fB\-e#\fR
-benchmark file(s) using multiple compression levels, from \fB\-b#\fR to \fB\-e#\fR (inclusive)
-.TP
-\fB\-i#\fR
-minimum evaluation time, in seconds (default: 3s), benchmark mode only
-.TP
-\fB\-B#\fR, \fB\-\-block\-size=#\fR
-cut file(s) into independent chunks of size # (default: no chunking)
-.TP
-\fB\-\-priority=rt\fR
-set process priority to real\-time
-.P
-\fBOutput Format:\fR CompressionLevel#Filename: InputSize \-> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed
-.P
-\fBMethodology:\fR For both compression and decompression speed, the entire input is compressed/decompressed in\-memory to measure speed\. A run lasts at least 1 sec, so when files are small, they are compressed/decompressed several times per run, in order to improve measurement accuracy\.
+.
 .SH "ADVANCED COMPRESSION OPTIONS"
-### \-B#: Specify the size of each compression job\. This parameter is only available when multi\-threading is enabled\. Each compression job is run in parallel, so this value indirectly impacts the nb of active threads\. Default job size varies depending on compression level (generally \fB4 * windowSize\fR)\. \fB\-B#\fR makes it possible to manually select a custom size\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 512 KB, or \fBoverlapSize\fR, whichever is largest\. Different job sizes will lead to non\-identical compressed frames\.
+zstd provides 22 predefined regular compression levels plus the fast levels\. A compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor (one can see the result of this translation with \-\-show\-default\-cparams)\. These specific parameters can be overridden with advanced compression options\.
+.
 .SS "\-\-zstd[=options]:"
 \fBzstd\fR provides 22 predefined regular compression levels plus the fast levels\. This compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor\. (You can see the result of this translation with \fB\-\-show\-default\-cparams\fR\.) These specific parameters can be overridden with advanced compression options\. The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR:
+.
 .TP
 \fBstrategy\fR=\fIstrat\fR, \fBstrat\fR=\fIstrat\fR
 Specify a strategy used by a match finder\.
+.
 .IP
 There are 9 strategies numbered from 1 to 9, from fastest to strongest: 1=\fBZSTD_fast\fR, 2=\fBZSTD_dfast\fR, 3=\fBZSTD_greedy\fR, 4=\fBZSTD_lazy\fR, 5=\fBZSTD_lazy2\fR, 6=\fBZSTD_btlazy2\fR, 7=\fBZSTD_btopt\fR, 8=\fBZSTD_btultra\fR, 9=\fBZSTD_btultra2\fR\.
+.
 .TP
 \fBwindowLog\fR=\fIwlog\fR, \fBwlog\fR=\fIwlog\fR
 Specify the maximum number of bits for a match distance\.
+.
 .IP
 The higher number of increases the chance to find a match which usually improves compression ratio\. It also increases memory requirements for the compressor and decompressor\. The minimum \fIwlog\fR is 10 (1 KiB) and the maximum is 30 (1 GiB) on 32\-bit platforms and 31 (2 GiB) on 64\-bit platforms\.
+.
 .IP
 Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\.
+.
 .TP
 \fBhashLog\fR=\fIhlog\fR, \fBhlog\fR=\fIhlog\fR
 Specify the maximum number of bits for a hash table\.
+.
 .IP
 Bigger hash tables cause fewer collisions which usually makes compression faster, but requires more memory during compression\.
+.
 .IP
 The minimum \fIhlog\fR is 6 (64 entries / 256 B) and the maximum is 30 (1B entries / 4 GiB)\.
+.
 .TP
 \fBchainLog\fR=\fIclog\fR, \fBclog\fR=\fIclog\fR
 Specify the maximum number of bits for the secondary search structure, whose form depends on the selected \fBstrategy\fR\.
+.
 .IP
 Higher numbers of bits increases the chance to find a match which usually improves compression ratio\. It also slows down compression speed and increases memory requirements for compression\. This option is ignored for the \fBZSTD_fast\fR \fBstrategy\fR, which only has the primary hash table\.
+.
 .IP
 The minimum \fIclog\fR is 6 (64 entries / 256 B) and the maximum is 29 (512M entries / 2 GiB) on 32\-bit platforms and 30 (1B entries / 4 GiB) on 64\-bit platforms\.
+.
 .TP
 \fBsearchLog\fR=\fIslog\fR, \fBslog\fR=\fIslog\fR
 Specify the maximum number of searches in a hash chain or a binary tree using logarithmic scale\.
+.
 .IP
 More searches increases the chance to find a match which usually increases compression ratio but decreases compression speed\.
+.
 .IP
 The minimum \fIslog\fR is 1 and the maximum is \'windowLog\' \- 1\.
+.
 .TP
 \fBminMatch\fR=\fImml\fR, \fBmml\fR=\fImml\fR
 Specify the minimum searched length of a match in a hash table\.
+.
 .IP
 Larger search lengths usually decrease compression ratio but improve decompression speed\.
+.
 .IP
 The minimum \fImml\fR is 3 and the maximum is 7\.
+.
 .TP
 \fBtargetLength\fR=\fItlen\fR, \fBtlen\fR=\fItlen\fR
 The impact of this field vary depending on selected strategy\.
+.
 .IP
 For \fBZSTD_btopt\fR, \fBZSTD_btultra\fR and \fBZSTD_btultra2\fR, it specifies the minimum match length that causes match finder to stop searching\. A larger \fBtargetLength\fR usually improves compression ratio but decreases compression speed\.
+.
 .IP
 For \fBZSTD_fast\fR, it triggers ultra\-fast mode when > 0\. The value represents the amount of data skipped between match sampling\. Impact is reversed: a larger \fBtargetLength\fR increases compression speed but decreases compression ratio\.
+.
 .IP
 For all other strategies, this field has no impact\.
+.
 .IP
 The minimum \fItlen\fR is 0 and the maximum is 128 KiB\.
+.
 .TP
 \fBoverlapLog\fR=\fIovlog\fR, \fBovlog\fR=\fIovlog\fR
 Determine \fBoverlapSize\fR, amount of data reloaded from previous job\. This parameter is only available when multithreading is enabled\. Reloading more data improves compression ratio, but decreases speed\.
+.
 .IP
 The minimum \fIovlog\fR is 0, and the maximum is 9\. 1 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the reloaded amount by a factor 2\. For example, 8 means "windowSize/2", and 6 means "windowSize/8"\. Value 0 is special and means "default": \fIovlog\fR is automatically determined by \fBzstd\fR\. In which case, \fIovlog\fR will range from 6 to 9, depending on selected \fIstrat\fR\.
+.
 .TP
 \fBldmHashLog\fR=\fIlhlog\fR, \fBlhlog\fR=\fIlhlog\fR
 Specify the maximum size for a hash table used for long distance matching\.
+.
 .IP
 This option is ignored unless long distance matching is enabled\.
+.
 .IP
 Bigger hash tables usually improve compression ratio at the expense of more memory during compression and a decrease in compression speed\.
+.
 .IP
 The minimum \fIlhlog\fR is 6 and the maximum is 30 (default: 20)\.
+.
 .TP
 \fBldmMinMatch\fR=\fIlmml\fR, \fBlmml\fR=\fIlmml\fR
 Specify the minimum searched length of a match for long distance matching\.
+.
 .IP
 This option is ignored unless long distance matching is enabled\.
+.
 .IP
 Larger/very small values usually decrease compression ratio\.
+.
 .IP
 The minimum \fIlmml\fR is 4 and the maximum is 4096 (default: 64)\.
+.
 .TP
 \fBldmBucketSizeLog\fR=\fIlblog\fR, \fBlblog\fR=\fIlblog\fR
 Specify the size of each bucket for the hash table used for long distance matching\.
+.
 .IP
 This option is ignored unless long distance matching is enabled\.
+.
 .IP
 Larger bucket sizes improve collision resolution but decrease compression speed\.
+.
 .IP
 The minimum \fIlblog\fR is 1 and the maximum is 8 (default: 3)\.
+.
 .TP
 \fBldmHashRateLog\fR=\fIlhrlog\fR, \fBlhrlog\fR=\fIlhrlog\fR
 Specify the frequency of inserting entries into the long distance matching hash table\.
+.
 .IP
 This option is ignored unless long distance matching is enabled\.
+.
 .IP
 Larger values will improve compression speed\. Deviating far from the default value will likely result in a decrease in compression ratio\.
+.
 .IP
 The default value is \fBwlog \- lhlog\fR\.
+.
 .SS "Example"
 The following parameters sets advanced compression options to something similar to predefined level 19 for files bigger than 256 KB:
+.
 .P
 \fB\-\-zstd\fR=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
+.
+.SS "\-B#:"
+Specify the size of each compression job\. This parameter is only available when multi\-threading is enabled\. Each compression job is run in parallel, so this value indirectly impacts the nb of active threads\. Default job size varies depending on compression level (generally \fB4 * windowSize\fR)\. \fB\-B#\fR makes it possible to manually select a custom size\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 512 KB, or \fBoverlapSize\fR, whichever is largest\. Different job sizes will lead to non\-identical compressed frames\.
+.
+.SH "DICTIONARY BUILDER"
+\fBzstd\fR offers \fIdictionary\fR compression, which greatly improves efficiency on small files and messages\. It\'s possible to train \fBzstd\fR with a set of samples, the result of which is saved into a file called a \fBdictionary\fR\. Then, during compression and decompression, reference the same dictionary, using command \fB\-D dictionaryFileName\fR\. Compression of small files similar to the sample set will be greatly improved\.
+.
+.TP
+\fB\-\-train FILEs\fR
+Use FILEs as training set to create a dictionary\. The training set should ideally contain a lot of samples (> 100), and weight typically 100x the target dictionary size (for example, ~10 MB for a 100 KB dictionary)\. \fB\-\-train\fR can be combined with \fB\-r\fR to indicate a directory rather than listing all the files, which can be useful to circumvent shell expansion limits\.
+.
+.IP
+Since dictionary compression is mostly effective for small files, the expectation is that the training set will only contain small files\. In the case where some samples happen to be large, only the first 128 KiB of these samples will be used for training\.
+.
+.IP
+\fB\-\-train\fR supports multithreading if \fBzstd\fR is compiled with threading support (default)\. Additional advanced parameters can be specified with \fB\-\-train\-fastcover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. The slower cover dictionary builder can be accessed with \fB\-\-train\-cover\fR\. Default \fB\-\-train\fR is equivalent to \fB\-\-train\-fastcover=d=8,steps=4\fR\.
+.
+.TP
+\fB\-o FILE\fR
+Dictionary saved into \fBFILE\fR (default name: dictionary)\.
+.
+.TP
+\fB\-\-maxdict=#\fR
+Limit dictionary to specified size (default: 112640 bytes)\. As usual, quantities are expressed in bytes by default, and it\'s possible to employ suffixes (like \fBKB\fR or \fBMB\fR) to specify larger values\.
+.
+.TP
+\fB\-#\fR
+Use \fB#\fR compression level during training (optional)\. Will generate statistics more tuned for selected compression level, resulting in a \fIsmall\fR compression ratio improvement for this level\.
+.
+.TP
+\fB\-B#\fR
+Split input files into blocks of size # (default: no split)
+.
+.TP
+\fB\-M#\fR, \fB\-\-memory=#\fR
+Limit the amount of sample data loaded for training (default: 2 GB)\. Note that the default (2 GB) is also the maximum\. This parameter can be useful in situations where the training set size is not well controlled and could be potentially very large\. Since speed of the training process is directly correlated to the size of the training sample set, a smaller sample set leads to faster training\.
+.
+.IP
+In situations where the training set is larger than maximum memory, the CLI will randomly select samples among the available ones, up to the maximum allowed memory budget\. This is meant to improve dictionary relevance by mitigating the potential impact of clustering, such as selecting only files from the beginning of a list sorted by modification date, or sorted by alphabetical order\. The randomization process is deterministic, so training of the same list of files with the same parameters will lead to the creation of the same dictionary\.
+.
+.TP
+\fB\-\-dictID=#\fR
+A dictionary ID is a locally unique ID\. The decoder will use this value to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to provide an explicit number ID instead\. It\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\. Note that short numbers have an advantage: an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\.
+.
+.IP
+Note that RFC8878 reserves IDs less than 32768 and greater than or equal to 2^31, so they should not be used in public\.
+.
+.TP
+\fB\-\-train\-cover[=k#,d=#,steps=#,split=#,shrink[=#]]\fR
+Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. If \fIsplit\fR is not specified or split <= 0, then the default value of 100 is used\. Requires that \fId\fR <= \fIk\fR\. If \fIshrink\fR flag is not used, then the default value for \fIshrinkDict\fR of 0 is used\. If \fIshrink\fR is not specified, then the default value for \fIshrinkDictMaxRegression\fR of 1 is used\.
+.
+.IP
+Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. If \fIsplit\fR is 100, all input samples are used for both training and testing to find optimal \fId\fR and \fIk\fR to build dictionary\. Supports multithreading if \fBzstd\fR is compiled with threading support\. Having \fIshrink\fR enabled takes a truncated dictionary of minimum size and doubles in size until compression ratio of the truncated dictionary is at most \fIshrinkDictMaxRegression%\fR worse than the compression ratio of the largest dictionary\.
+.
+.IP
+Examples:
+.
+.IP
+\fBzstd \-\-train\-cover FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=k=50,d=8 FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=d=8,steps=500 FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=k=50 FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=k=50,split=60 FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=shrink FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=shrink=2 FILEs\fR
+.
+.TP
+\fB\-\-train\-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]\fR
+Same as cover but with extra parameters \fIf\fR and \fIaccel\fR and different default value of split If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75\. If \fIf\fR is not specified, then it tries \fIf\fR = 20\. Requires that 0 < \fIf\fR < 32\. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1\. Requires that 0 < \fIaccel\fR <= 10\. Requires that \fId\fR = 6 or \fId\fR = 8\.
+.
+.IP
+\fIf\fR is log of size of array that keeps track of frequency of subsegments of size \fId\fR\. The subsegment is hashed to an index in the range [0,2^\fIf\fR \- 1]\. It is possible that 2 different subsegments are hashed to the same index, and they are considered as the same subsegment when computing frequency\. Using a higher \fIf\fR reduces collision but takes longer\.
+.
+.IP
+Examples:
+.
+.IP
+\fBzstd \-\-train\-fastcover FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-fastcover=d=8,f=15,accel=2 FILEs\fR
+.
+.TP
+\fB\-\-train\-legacy[=selectivity=#]\fR
+Use legacy dictionary builder algorithm with the given dictionary \fIselectivity\fR (default: 9)\. The smaller the \fIselectivity\fR value, the denser the dictionary, improving its efficiency but reducing its achievable maximum size\. \fB\-\-train\-legacy=s=#\fR is also accepted\.
+.
+.IP
+Examples:
+.
+.IP
+\fBzstd \-\-train\-legacy FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-legacy=selectivity=8 FILEs\fR
+.
+.SH "BENCHMARK"
+The \fBzstd\fR CLI provides a benchmarking mode that can be used to easily find suitable compression parameters, or alternatively to benchmark a computer\'s performance\. Note that the results are highly dependent on the content being compressed\.
+.
+.TP
+\fB\-b#\fR
+benchmark file(s) using compression level #
+.
+.TP
+\fB\-e#\fR
+benchmark file(s) using multiple compression levels, from \fB\-b#\fR to \fB\-e#\fR (inclusive)
+.
+.TP
+\fB\-d\fR
+benchmark decompression speed only (requires providing an already zstd\-compressed content)
+.
+.TP
+\fB\-i#\fR
+minimum evaluation time, in seconds (default: 3s), benchmark mode only
+.
+.TP
+\fB\-B#\fR, \fB\-\-block\-size=#\fR
+cut file(s) into independent chunks of size # (default: no chunking)
+.
+.TP
+\fB\-\-priority=rt\fR
+set process priority to real\-time (Windows)
+.
+.P
+\fBOutput Format:\fR CompressionLevel#Filename: InputSize \-> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed
+.
+.P
+\fBMethodology:\fR For both compression and decompression speed, the entire input is compressed/decompressed in\-memory to measure speed\. A run lasts at least 1 sec, so when files are small, they are compressed/decompressed several times per run, in order to improve measurement accuracy\.
+.
 .SH "SEE ALSO"
 \fBzstdgrep\fR(1), \fBzstdless\fR(1), \fBgzip\fR(1), \fBxz\fR(1)
+.
 .P
 The \fIzstandard\fR format is specified in Y\. Collet, "Zstandard Compression and the \'application/zstd\' Media Type", https://www\.ietf\.org/rfc/rfc8878\.txt, Internet RFC 8878 (February 2021)\.
+.
 .SH "BUGS"
 Report bugs at: https://github\.com/facebook/zstd/issues
+.
 .SH "AUTHOR"
 Yann Collet
diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index a5046932ee1..f5abb141ad3 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -21,10 +21,11 @@ It is based on the **LZ77** family, with further FSE & huff0 entropy stages.
 `zstd` offers highly configurable compression speed,
 from fast modes at > 200 MB/s per core,
 to strong modes with excellent compression ratios.
-It also features a very fast decoder, with speeds > 500 MB/s per core.
+It also features a very fast decoder, with speeds > 500 MB/s per core,
+which remains roughly stable at all compression settings.
 
 `zstd` command line syntax is generally similar to gzip,
-but features the following differences:
+but features the following few differences:
 
   - Source files are preserved by default.
     It's possible to remove them automatically by using the `--rm` command.
@@ -105,7 +106,11 @@ the last one takes effect.
 ### Operation Modifiers
 
 * `-#`:
-    selects `#` compression level \[1-19\] (default: 3)
+    selects `#` compression level \[1-19\] (default: 3).
+    Higher compression levels *generally* produce higher compression ratio at the expense of speed and memory.
+    A rough rule of thumb is that compression speed is expected to be divided by 2 every 2 levels.
+    Technically, each level is mapped to a set of advanced parameters (that can also be modified individually, see below).
+    Because the compressor's behavior highly depends on the content to compress, there's no guarantee of a smooth progression from one level to another.
 * `--ultra`:
     unlocks high compression levels 20+ (maximum 22), using a lot more memory.
     Note that decompression will also require more memory when using these levels.
@@ -325,11 +330,10 @@ options that intend to mimic the `gzip` behavior:
 
 
 ### Environment Variables
-
 Employing environment variables to set parameters has security implications.
 Therefore, this avenue is intentionally limited.
 Only `ZSTD_CLEVEL` and `ZSTD_NBTHREADS` are currently supported.
-They set the compression level and number of threads to use during compression, respectively.
+They set the default compression level and number of threads to use during compression, respectively.
 
 `ZSTD_CLEVEL` can be used to set the level between 1 and 19 (the "normal" range).
 If the value of `ZSTD_CLEVEL` is not a valid integer, it will be ignored with a warning message.
@@ -338,12 +342,175 @@ If the value of `ZSTD_CLEVEL` is not a valid integer, it will be ignored with a
 `ZSTD_NBTHREADS` can be used to set the number of threads `zstd` will attempt to use during compression.
 If the value of `ZSTD_NBTHREADS` is not a valid unsigned integer, it will be ignored with a warning message.
 `ZSTD_NBTHREADS` has a default value of (`1`), and is capped at ZSTDMT_NBWORKERS_MAX==200.
-`zstd` must be compiled with multithread support for this to have any effect.
+`zstd` must be compiled with multithread support for this variable to have any effect.
 
 They can both be overridden by corresponding command line arguments:
 `-#` for compression level and `-T#` for number of compression threads.
 
 
+ADVANCED COMPRESSION OPTIONS
+----------------------------
+zstd provides 22 predefined regular compression levels plus the fast levels.
+A compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor
+(one can see the result of this translation with --show-default-cparams).
+These specific parameters can be overridden with advanced compression options.
+
+### --zstd[=options]:
+`zstd` provides 22 predefined regular compression levels plus the fast levels.
+This compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor.
+(You can see the result of this translation with `--show-default-cparams`.)
+These specific parameters can be overridden with advanced compression options.
+The _options_ are provided as a comma-separated list.
+You may specify only the options you want to change and the rest will be
+taken from the selected or default compression level.
+The list of available _options_:
+
+- `strategy`=_strat_, `strat`=_strat_:
+    Specify a strategy used by a match finder.
+
+    There are 9 strategies numbered from 1 to 9, from fastest to strongest:
+    1=`ZSTD_fast`, 2=`ZSTD_dfast`, 3=`ZSTD_greedy`,
+    4=`ZSTD_lazy`, 5=`ZSTD_lazy2`, 6=`ZSTD_btlazy2`,
+    7=`ZSTD_btopt`, 8=`ZSTD_btultra`, 9=`ZSTD_btultra2`.
+
+- `windowLog`=_wlog_, `wlog`=_wlog_:
+    Specify the maximum number of bits for a match distance.
+
+    The higher number of increases the chance to find a match which usually
+    improves compression ratio.
+    It also increases memory requirements for the compressor and decompressor.
+    The minimum _wlog_ is 10 (1 KiB) and the maximum is 30 (1 GiB) on 32-bit
+    platforms and 31 (2 GiB) on 64-bit platforms.
+
+    Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
+    `--memory=windowSize` needs to be passed to the decompressor.
+
+- `hashLog`=_hlog_, `hlog`=_hlog_:
+    Specify the maximum number of bits for a hash table.
+
+    Bigger hash tables cause fewer collisions which usually makes compression
+    faster, but requires more memory during compression.
+
+    The minimum _hlog_ is 6 (64 entries / 256 B) and the maximum is 30 (1B entries / 4 GiB).
+
+- `chainLog`=_clog_, `clog`=_clog_:
+    Specify the maximum number of bits for the secondary search structure,
+    whose form depends on the selected `strategy`.
+
+    Higher numbers of bits increases the chance to find a match which usually
+    improves compression ratio.
+    It also slows down compression speed and increases memory requirements for
+    compression.
+    This option is ignored for the `ZSTD_fast` `strategy`, which only has the primary hash table.
+
+    The minimum _clog_ is 6 (64 entries / 256 B) and the maximum is 29 (512M entries / 2 GiB) on 32-bit platforms
+    and 30 (1B entries / 4 GiB) on 64-bit platforms.
+
+- `searchLog`=_slog_, `slog`=_slog_:
+    Specify the maximum number of searches in a hash chain or a binary tree
+    using logarithmic scale.
+
+    More searches increases the chance to find a match which usually increases
+    compression ratio but decreases compression speed.
+
+    The minimum _slog_ is 1 and the maximum is 'windowLog' - 1.
+
+- `minMatch`=_mml_, `mml`=_mml_:
+    Specify the minimum searched length of a match in a hash table.
+
+    Larger search lengths usually decrease compression ratio but improve
+    decompression speed.
+
+    The minimum _mml_ is 3 and the maximum is 7.
+
+- `targetLength`=_tlen_, `tlen`=_tlen_:
+    The impact of this field vary depending on selected strategy.
+
+    For `ZSTD_btopt`, `ZSTD_btultra` and `ZSTD_btultra2`, it specifies
+    the minimum match length that causes match finder to stop searching.
+    A larger `targetLength` usually improves compression ratio
+    but decreases compression speed.
+
+    For `ZSTD_fast`, it triggers ultra-fast mode when > 0.
+    The value represents the amount of data skipped between match sampling.
+    Impact is reversed: a larger `targetLength` increases compression speed
+    but decreases compression ratio.
+
+    For all other strategies, this field has no impact.
+
+    The minimum _tlen_ is 0 and the maximum is 128 KiB.
+
+- `overlapLog`=_ovlog_,  `ovlog`=_ovlog_:
+    Determine `overlapSize`, amount of data reloaded from previous job.
+    This parameter is only available when multithreading is enabled.
+    Reloading more data improves compression ratio, but decreases speed.
+
+    The minimum _ovlog_ is 0, and the maximum is 9.
+    1 means "no overlap", hence completely independent jobs.
+    9 means "full overlap", meaning up to `windowSize` is reloaded from previous job.
+    Reducing _ovlog_ by 1 reduces the reloaded amount by a factor 2.
+    For example, 8 means "windowSize/2", and 6 means "windowSize/8".
+    Value 0 is special and means "default": _ovlog_ is automatically determined by `zstd`.
+    In which case, _ovlog_ will range from 6 to 9, depending on selected _strat_.
+
+- `ldmHashLog`=_lhlog_, `lhlog`=_lhlog_:
+    Specify the maximum size for a hash table used for long distance matching.
+
+    This option is ignored unless long distance matching is enabled.
+
+    Bigger hash tables usually improve compression ratio at the expense of more
+    memory during compression and a decrease in compression speed.
+
+    The minimum _lhlog_ is 6 and the maximum is 30 (default: 20).
+
+- `ldmMinMatch`=_lmml_, `lmml`=_lmml_:
+    Specify the minimum searched length of a match for long distance matching.
+
+    This option is ignored unless long distance matching is enabled.
+
+    Larger/very small values usually decrease compression ratio.
+
+    The minimum _lmml_ is 4 and the maximum is 4096 (default: 64).
+
+- `ldmBucketSizeLog`=_lblog_, `lblog`=_lblog_:
+    Specify the size of each bucket for the hash table used for long distance
+    matching.
+
+    This option is ignored unless long distance matching is enabled.
+
+    Larger bucket sizes improve collision resolution but decrease compression
+    speed.
+
+    The minimum _lblog_ is 1 and the maximum is 8 (default: 3).
+
+- `ldmHashRateLog`=_lhrlog_, `lhrlog`=_lhrlog_:
+    Specify the frequency of inserting entries into the long distance matching
+    hash table.
+
+    This option is ignored unless long distance matching is enabled.
+
+    Larger values will improve compression speed. Deviating far from the
+    default value will likely result in a decrease in compression ratio.
+
+    The default value is `wlog - lhlog`.
+
+### Example
+The following parameters sets advanced compression options to something
+similar to predefined level 19 for files bigger than 256 KB:
+
+`--zstd`=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
+
+### -B#:
+Specify the size of each compression job.
+This parameter is only available when multi-threading is enabled.
+Each compression job is run in parallel, so this value indirectly impacts the nb of active threads.
+Default job size varies depending on compression level (generally  `4 * windowSize`).
+`-B#` makes it possible to manually select a custom size.
+Note that job size must respect a minimum value which is enforced transparently.
+This minimum is either 512 KB, or `overlapSize`, whichever is largest.
+Different job sizes will lead to non-identical compressed frames.
+
+
 DICTIONARY BUILDER
 ------------------
 `zstd` offers _dictionary_ compression,
@@ -496,178 +663,26 @@ Compression of small files similar to the sample set will be greatly improved.
 
 BENCHMARK
 ---------
+The `zstd` CLI provides a benchmarking mode that can be used to easily find suitable compression parameters, or alternatively to benchmark a computer's performance.
+Note that the results are highly dependent on the content being compressed.
 
 * `-b#`:
     benchmark file(s) using compression level #
 * `-e#`:
     benchmark file(s) using multiple compression levels, from `-b#` to `-e#` (inclusive)
+* `-d`:
+    benchmark decompression speed only (requires providing an already zstd-compressed content)
 * `-i#`:
     minimum evaluation time, in seconds (default: 3s), benchmark mode only
 * `-B#`, `--block-size=#`:
     cut file(s) into independent chunks of size # (default: no chunking)
 * `--priority=rt`:
-    set process priority to real-time
+    set process priority to real-time (Windows)
 
 **Output Format:** CompressionLevel#Filename: InputSize -> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed
 
 **Methodology:** For both compression and decompression speed, the entire input is compressed/decompressed in-memory to measure speed. A run lasts at least 1 sec, so when files are small, they are compressed/decompressed several times per run, in order to improve measurement accuracy.
 
-ADVANCED COMPRESSION OPTIONS
-----------------------------
-### -B#:
-Specify the size of each compression job.
-This parameter is only available when multi-threading is enabled.
-Each compression job is run in parallel, so this value indirectly impacts the nb of active threads.
-Default job size varies depending on compression level (generally  `4 * windowSize`).
-`-B#` makes it possible to manually select a custom size.
-Note that job size must respect a minimum value which is enforced transparently.
-This minimum is either 512 KB, or `overlapSize`, whichever is largest.
-Different job sizes will lead to non-identical compressed frames.
-
-### --zstd[=options]:
-`zstd` provides 22 predefined regular compression levels plus the fast levels.
-This compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor.
-(You can see the result of this translation with `--show-default-cparams`.)
-These specific parameters can be overridden with advanced compression options.
-The _options_ are provided as a comma-separated list.
-You may specify only the options you want to change and the rest will be
-taken from the selected or default compression level.
-The list of available _options_:
-
-- `strategy`=_strat_, `strat`=_strat_:
-    Specify a strategy used by a match finder.
-
-    There are 9 strategies numbered from 1 to 9, from fastest to strongest:
-    1=`ZSTD_fast`, 2=`ZSTD_dfast`, 3=`ZSTD_greedy`,
-    4=`ZSTD_lazy`, 5=`ZSTD_lazy2`, 6=`ZSTD_btlazy2`,
-    7=`ZSTD_btopt`, 8=`ZSTD_btultra`, 9=`ZSTD_btultra2`.
-
-- `windowLog`=_wlog_, `wlog`=_wlog_:
-    Specify the maximum number of bits for a match distance.
-
-    The higher number of increases the chance to find a match which usually
-    improves compression ratio.
-    It also increases memory requirements for the compressor and decompressor.
-    The minimum _wlog_ is 10 (1 KiB) and the maximum is 30 (1 GiB) on 32-bit
-    platforms and 31 (2 GiB) on 64-bit platforms.
-
-    Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
-    `--memory=windowSize` needs to be passed to the decompressor.
-
-- `hashLog`=_hlog_, `hlog`=_hlog_:
-    Specify the maximum number of bits for a hash table.
-
-    Bigger hash tables cause fewer collisions which usually makes compression
-    faster, but requires more memory during compression.
-
-    The minimum _hlog_ is 6 (64 entries / 256 B) and the maximum is 30 (1B entries / 4 GiB).
-
-- `chainLog`=_clog_, `clog`=_clog_:
-    Specify the maximum number of bits for the secondary search structure,
-    whose form depends on the selected `strategy`.
-
-    Higher numbers of bits increases the chance to find a match which usually
-    improves compression ratio.
-    It also slows down compression speed and increases memory requirements for
-    compression.
-    This option is ignored for the `ZSTD_fast` `strategy`, which only has the primary hash table.
-
-    The minimum _clog_ is 6 (64 entries / 256 B) and the maximum is 29 (512M entries / 2 GiB) on 32-bit platforms
-    and 30 (1B entries / 4 GiB) on 64-bit platforms.
-
-- `searchLog`=_slog_, `slog`=_slog_:
-    Specify the maximum number of searches in a hash chain or a binary tree
-    using logarithmic scale.
-
-    More searches increases the chance to find a match which usually increases
-    compression ratio but decreases compression speed.
-
-    The minimum _slog_ is 1 and the maximum is 'windowLog' - 1.
-
-- `minMatch`=_mml_, `mml`=_mml_:
-    Specify the minimum searched length of a match in a hash table.
-
-    Larger search lengths usually decrease compression ratio but improve
-    decompression speed.
-
-    The minimum _mml_ is 3 and the maximum is 7.
-
-- `targetLength`=_tlen_, `tlen`=_tlen_:
-    The impact of this field vary depending on selected strategy.
-
-    For `ZSTD_btopt`, `ZSTD_btultra` and `ZSTD_btultra2`, it specifies
-    the minimum match length that causes match finder to stop searching.
-    A larger `targetLength` usually improves compression ratio
-    but decreases compression speed.
-
-    For `ZSTD_fast`, it triggers ultra-fast mode when > 0.
-    The value represents the amount of data skipped between match sampling.
-    Impact is reversed: a larger `targetLength` increases compression speed
-    but decreases compression ratio.
-
-    For all other strategies, this field has no impact.
-
-    The minimum _tlen_ is 0 and the maximum is 128 KiB.
-
-- `overlapLog`=_ovlog_,  `ovlog`=_ovlog_:
-    Determine `overlapSize`, amount of data reloaded from previous job.
-    This parameter is only available when multithreading is enabled.
-    Reloading more data improves compression ratio, but decreases speed.
-
-    The minimum _ovlog_ is 0, and the maximum is 9.
-    1 means "no overlap", hence completely independent jobs.
-    9 means "full overlap", meaning up to `windowSize` is reloaded from previous job.
-    Reducing _ovlog_ by 1 reduces the reloaded amount by a factor 2.
-    For example, 8 means "windowSize/2", and 6 means "windowSize/8".
-    Value 0 is special and means "default": _ovlog_ is automatically determined by `zstd`.
-    In which case, _ovlog_ will range from 6 to 9, depending on selected _strat_.
-
-- `ldmHashLog`=_lhlog_, `lhlog`=_lhlog_:
-    Specify the maximum size for a hash table used for long distance matching.
-
-    This option is ignored unless long distance matching is enabled.
-
-    Bigger hash tables usually improve compression ratio at the expense of more
-    memory during compression and a decrease in compression speed.
-
-    The minimum _lhlog_ is 6 and the maximum is 30 (default: 20).
-
-- `ldmMinMatch`=_lmml_, `lmml`=_lmml_:
-    Specify the minimum searched length of a match for long distance matching.
-
-    This option is ignored unless long distance matching is enabled.
-
-    Larger/very small values usually decrease compression ratio.
-
-    The minimum _lmml_ is 4 and the maximum is 4096 (default: 64).
-
-- `ldmBucketSizeLog`=_lblog_, `lblog`=_lblog_:
-    Specify the size of each bucket for the hash table used for long distance
-    matching.
-
-    This option is ignored unless long distance matching is enabled.
-
-    Larger bucket sizes improve collision resolution but decrease compression
-    speed.
-
-    The minimum _lblog_ is 1 and the maximum is 8 (default: 3).
-
-- `ldmHashRateLog`=_lhrlog_, `lhrlog`=_lhrlog_:
-    Specify the frequency of inserting entries into the long distance matching
-    hash table.
-
-    This option is ignored unless long distance matching is enabled.
-
-    Larger values will improve compression speed. Deviating far from the
-    default value will likely result in a decrease in compression ratio.
-
-    The default value is `wlog - lhlog`.
-
-### Example
-The following parameters sets advanced compression options to something
-similar to predefined level 19 for files bigger than 256 KB:
-
-`--zstd`=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
 
 SEE ALSO
 --------

From ff6713fd72b083ce8a7d1f2a89cd3749ce9f07a8 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 12 Mar 2024 13:09:10 -0700
Subject: [PATCH 248/283] fix duplicated paragraph

reported by @zougloub
---
 programs/zstd.1    |  4 ++--
 programs/zstd.1.md | 12 ++++--------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/programs/zstd.1 b/programs/zstd.1
index 1eb9b97389b..f7af5527071 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -271,10 +271,10 @@ Employing environment variables to set parameters has security implications\. Th
 They can both be overridden by corresponding command line arguments: \fB\-#\fR for compression level and \fB\-T#\fR for number of compression threads\.
 .
 .SH "ADVANCED COMPRESSION OPTIONS"
-zstd provides 22 predefined regular compression levels plus the fast levels\. A compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor (one can see the result of this translation with \-\-show\-default\-cparams)\. These specific parameters can be overridden with advanced compression options\.
+\fBzstd\fR provides 22 predefined regular compression levels plus the fast levels\. A compression level is translated internally into multiple advanced parameters that control the behavior of the compressor (one can observe the result of this translation with \fB\-\-show\-default\-cparams\fR)\. These advanced parameters can be overridden using advanced compression options\.
 .
 .SS "\-\-zstd[=options]:"
-\fBzstd\fR provides 22 predefined regular compression levels plus the fast levels\. This compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor\. (You can see the result of this translation with \fB\-\-show\-default\-cparams\fR\.) These specific parameters can be overridden with advanced compression options\. The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR:
+The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR:
 .
 .TP
 \fBstrategy\fR=\fIstrat\fR, \fBstrat\fR=\fIstrat\fR
diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index f5abb141ad3..fcbfb457301 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -350,16 +350,12 @@ They can both be overridden by corresponding command line arguments:
 
 ADVANCED COMPRESSION OPTIONS
 ----------------------------
-zstd provides 22 predefined regular compression levels plus the fast levels.
-A compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor
-(one can see the result of this translation with --show-default-cparams).
-These specific parameters can be overridden with advanced compression options.
+`zstd` provides 22 predefined regular compression levels plus the fast levels.
+A compression level is translated internally into multiple advanced parameters that control the behavior of the compressor
+(one can observe the result of this translation with `--show-default-cparams`).
+These advanced parameters can be overridden using advanced compression options.
 
 ### --zstd[=options]:
-`zstd` provides 22 predefined regular compression levels plus the fast levels.
-This compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor.
-(You can see the result of this translation with `--show-default-cparams`.)
-These specific parameters can be overridden with advanced compression options.
 The _options_ are provided as a comma-separated list.
 You may specify only the options you want to change and the rest will be
 taken from the selected or default compression level.

From f65b9e27ce0b6e4ed096126659021359d004d1ab Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky 
Date: Tue, 12 Mar 2024 17:07:06 -0400
Subject: [PATCH 249/283] Exercise ZSTD_findDecompressedSize() in the simple
 decompression fuzzer (#3959)

* Improve decompression fuzzer

* Fix legacy frame header fuzzer crash, add unit test
---
 lib/decompress/zstd_decompress.c | 9 +++++++++
 tests/fuzz/simple_decompress.c   | 8 +++++++-
 tests/zstreamtest.c              | 9 +++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 17305908540..42636d5780c 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -1093,6 +1093,15 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
             decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
             if (ZSTD_isError(decodedSize)) return decodedSize;
 
+            {
+                unsigned long long const expectedSize = ZSTD_getFrameContentSize(src, srcSize);
+                RETURN_ERROR_IF(expectedSize == ZSTD_CONTENTSIZE_ERROR, corruption_detected, "Corrupted frame header!");
+                if (expectedSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+                    RETURN_ERROR_IF(expectedSize != decodedSize, corruption_detected,
+                        "Frame header size does not match decoded size!");
+                }
+            }
+
             assert(decodedSize <= dstCapacity);
             dst = (BYTE*)dst + decodedSize;
             dstCapacity -= decodedSize;
diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c
index ce5f9f0981b..0ee61902cdb 100644
--- a/tests/fuzz/simple_decompress.c
+++ b/tests/fuzz/simple_decompress.c
@@ -37,7 +37,13 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
     size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
     void *rBuf = FUZZ_malloc(bufSize);
 
-    ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
+    size_t const dSize = ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
+    if (!ZSTD_isError(dSize)) {
+        /* If decompression was successful, the content size from the frame header(s) should be valid. */
+        size_t const expectedSize = ZSTD_findDecompressedSize(src, size);
+        FUZZ_ASSERT(expectedSize != ZSTD_CONTENTSIZE_ERROR);
+        FUZZ_ASSERT(expectedSize == ZSTD_CONTENTSIZE_UNKNOWN || expectedSize == dSize);
+    }
     free(rBuf);
 
     FUZZ_dataProducer_free(producer);
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 82aaf3db50c..7cc4068bc09 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -2408,6 +2408,15 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
     }
     DISPLAYLEVEL(3, "OK \n");
 
+    DISPLAYLEVEL(3, "test%3i : Decoder should reject invalid frame header on legacy frames: ", testNb++);
+    {
+        const unsigned char compressed[] = { 0x26,0xb5,0x2f,0xfd,0x50,0x91,0xfd,0xd8,0xb5 };
+        const size_t compressedSize = 9;
+        size_t const dSize = ZSTD_decompress(NULL, 0, compressed, compressedSize);
+        CHECK(!ZSTD_isError(dSize), "must reject when legacy frame header is invalid");
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
 _end:
     FUZ_freeDictionary(dictionary);
     ZSTD_freeCStream(zc);

From b20703f273197589c8c70dd406b81ad601fa9b4a Mon Sep 17 00:00:00 2001
From: Yonatan Komornik <11005061+yoniko@users.noreply.github.com>
Date: Tue, 12 Mar 2024 15:10:07 -0700
Subject: [PATCH 250/283] Updates `ZSTD_RowFindBestMatch` comment (#3947)

Updates the comment on the head of `ZSTD_RowFindBestMatch` to make sure it's aligned with recent changes to the hash table.
---
 lib/compress/zstd_lazy.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c
index 3aba83c6fc3..67dd55fdb80 100644
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -1123,18 +1123,18 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGr
 
 /* The high-level approach of the SIMD row based match finder is as follows:
  * - Figure out where to insert the new entry:
- *      - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag"
- *      - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines
+ *      - Generate a hash for current input posistion and split it into a one byte of tag and `rowHashLog` bits of index.
+ *           - The hash is salted by a value that changes on every contex reset, so when the same table is used
+ *             we will avoid collisions that would otherwise slow us down by intorducing phantom matches.
+ *      - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines
  *        which row to insert into.
- *      - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can
- *        be considered as a circular buffer with a "head" index that resides in the tagTable.
- *      - Also insert the "tag" into the equivalent row and position in the tagTable.
- *          - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry.
- *                  The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
- *                  for alignment/performance reasons, leaving some bytes unused.
- * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
+ *      - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can
+ *        be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes
+ *        per row).
+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and
  *   generate a bitfield that we can cycle through to check the collisions in the hash table.
  * - Pick the longest match.
+ * - Insert the tag into the equivalent row and position in the tagTable.
  */
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR

From 94c102038b81ed89e3b013cb1977496612609f85 Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Tue, 12 Mar 2024 12:16:55 -0700
Subject: [PATCH 251/283] [cpu] Backport fix for rbx clobbering on Windows with
 Clang

Backport folly fix for rbx clobbering: https://github.com/facebook/folly/commit/f22f88b8b9d70160388f0f149bc9abaeb82c250b

This supercedes PR #3646.
---
 lib/common/cpu.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/lib/common/cpu.h b/lib/common/cpu.h
index 8bc34a36da2..0e684d9ad8e 100644
--- a/lib/common/cpu.h
+++ b/lib/common/cpu.h
@@ -35,6 +35,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
     U32 f7b = 0;
     U32 f7c = 0;
 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+#if !defined(__clang__)
     int reg[4];
     __cpuid((int*)reg, 0);
     {
@@ -50,6 +51,41 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
             f7c = (U32)reg[2];
         }
     }
+#else
+    /* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in
+     * which the `__cpuid` intrinsic does not save and restore `rbx` as it needs
+     * to due to being a reserved register. So in that case, do the `cpuid`
+     * ourselves. Clang supports inline assembly anyway.
+     */
+    U32 n;
+    __asm__(
+        "pushq %%rbx\n\t"
+        "cpuid\n\t"
+        "popq %%rbx\n\t"
+        : "=a"(n)
+        : "a"(0)
+        : "rcx", "rdx");
+    if (n >= 1) {
+      U32 f1a;
+      __asm__(
+          "pushq %%rbx\n\t"
+          "cpuid\n\t"
+          "popq %%rbx\n\t"
+          : "=a"(f1a), "=c"(f1c), "=d"(f1d)
+          : "a"(1)
+          :);
+    }
+    if (n >= 7) {
+      __asm__(
+          "pushq %%rbx\n\t"
+          "cpuid\n\t"
+          "movq %%rbx, %%rax\n\t"
+          "popq %%rbx"
+          : "=a"(f7b), "=c"(f7c)
+          : "a"(7), "c"(0)
+          : "rdx");
+    }
+#endif
 #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
     /* The following block like the normal cpuid branch below, but gcc
      * reserves ebx for use of its pic register so we must specially

From c1e995321e9d66a648818f7995999c4fe6d77878 Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Tue, 12 Mar 2024 12:38:21 -0700
Subject: [PATCH 252/283] [CI] Run tests with CMake on Windows

Build and run tests on Windows with CMake
---
 .github/workflows/dev-short-tests.yml | 5 +++--
 build/cmake/tests/CMakeLists.txt      | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml
index 9c36386a13f..b2aaff89cf7 100644
--- a/.github/workflows/dev-short-tests.yml
+++ b/.github/workflows/dev-short-tests.yml
@@ -272,14 +272,15 @@ jobs:
     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
     - name: Add MSBuild to PATH
       uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v2.0.0
-    - name: Build
+    - name: Build & Test
       working-directory: ${{env.GITHUB_WORKSPACE}}
       run: |
         cd build\cmake
         mkdir build
         cd build
-        cmake.exe -G "${{matrix.generator}}" ${{matrix.flags}} ..
+        cmake.exe -G "${{matrix.generator}}" ${{matrix.flags}} -DCMAKE_BUILD_TYPE=Debug -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZSTREAM_FLAGS=-T30s -DZSTD_FUZZER_FLAGS=-T30s -DZSTD_FULLBENCH_FLAGS=-i0 ..
         cmake.exe --build .
+        ctest.exe -V -C Debug
 
   msbuild-visual-studio:
     strategy:
diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt
index 3ead070102f..56104a4e341 100644
--- a/build/cmake/tests/CMakeLists.txt
+++ b/build/cmake/tests/CMakeLists.txt
@@ -61,7 +61,7 @@ if (NOT MSVC)
     target_compile_options(fullbench PRIVATE "-Wno-deprecated-declarations")
 endif()
 target_link_libraries(fullbench libzstd_static)
-add_test(NAME fullbench COMMAND fullbench ${ZSTD_FULLBENCH_FLAGS})
+add_test(NAME fullbench COMMAND "$" ${ZSTD_FULLBENCH_FLAGS})
 
 #
 # fuzzer
@@ -73,7 +73,7 @@ endif()
 target_link_libraries(fuzzer libzstd_static)
 AddTestFlagsOption(ZSTD_FUZZER_FLAGS "$ENV{FUZZERTEST} $ENV{FUZZER_FLAGS}"
     "Semicolon-separated list of flags to pass to the fuzzer test (see `fuzzer -h` for usage)")
-add_test(NAME fuzzer COMMAND fuzzer ${ZSTD_FUZZER_FLAGS})
+add_test(NAME fuzzer COMMAND "$" ${ZSTD_FUZZER_FLAGS})
 # Disable the timeout since the run time is too long for the default timeout of
 # 1500 seconds and varies considerably between low-end and high-end CPUs.
 # set_tests_properties(fuzzer PROPERTIES TIMEOUT 0)
@@ -88,7 +88,7 @@ endif()
 target_link_libraries(zstreamtest libzstd_static)
 AddTestFlagsOption(ZSTD_ZSTREAM_FLAGS "$ENV{ZSTREAM_TESTTIME} $ENV{FUZZER_FLAGS}"
     "Semicolon-separated list of flags to pass to the zstreamtest test (see `zstreamtest -h` for usage)")
-add_test(NAME zstreamtest COMMAND zstreamtest ${ZSTD_ZSTREAM_FLAGS})
+add_test(NAME zstreamtest COMMAND "$" ${ZSTD_ZSTREAM_FLAGS})
 
 #
 # playTests.sh

From c8ab027227536a543efd1b7bea04aabf9e97accf Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Wed, 13 Mar 2024 11:29:28 -0700
Subject: [PATCH 253/283] reduce the amount of includes in "cover.h"

---
 lib/compress/fse_compress.c |  2 +-
 lib/dictBuilder/cover.c     | 24 ++++++++++++++----------
 lib/dictBuilder/cover.h     | 10 ++--------
 3 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c
index 158ba80ca94..1ce3cf16ac1 100644
--- a/lib/compress/fse_compress.c
+++ b/lib/compress/fse_compress.c
@@ -25,7 +25,7 @@
 #include "../common/error_private.h"
 #define ZSTD_DEPS_NEED_MALLOC
 #define ZSTD_DEPS_NEED_MATH64
-#include "../common/zstd_deps.h"  /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
+#include "../common/zstd_deps.h"  /* ZSTD_memset */
 #include "../common/bits.h" /* ZSTD_highbit32 */
 
 
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c
index e7fcfd2099b..44f9029acd9 100644
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -31,8 +31,8 @@
 #endif
 
 #include "../common/mem.h" /* read */
-#include "../common/pool.h"
-#include "../common/threading.h"
+#include "../common/pool.h" /* POOL_ctx */
+#include "../common/threading.h" /* ZSTD_pthread_mutex_t */
 #include "../common/zstd_internal.h" /* includes zstd.h */
 #include "../common/bits.h" /* ZSTD_highbit32 */
 #include "../zdict.h"
@@ -78,7 +78,7 @@ static clock_t g_time = 0;
 #undef  LOCALDISPLAYUPDATE
 #define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \
   if (displayLevel >= l) {                                                     \
-    if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) {             \
+    if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) {           \
       g_time = clock();                                                        \
       DISPLAY(__VA_ARGS__);                                                    \
     }                                                                          \
@@ -301,9 +301,10 @@ static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
  * Returns the first pointer in [first, last) whose element does not compare
  * less than value.  If no such element exists it returns last.
  */
-static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
+static const size_t *COVER_lower_bound(const size_t* first, const size_t* last,
                                        size_t value) {
-  size_t count = last - first;
+  size_t count = (size_t)(last - first);
+  assert(last >= first);
   while (count != 0) {
     size_t step = count / 2;
     const size_t *ptr = first;
@@ -549,7 +550,8 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
  */
 static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
                           const size_t *samplesSizes, unsigned nbSamples,
-                          unsigned d, double splitPoint) {
+                          unsigned d, double splitPoint)
+{
   const BYTE *const samples = (const BYTE *)samplesBuffer;
   const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
   /* Split samples into testing and training sets */
@@ -907,8 +909,10 @@ void COVER_best_start(COVER_best_t *best) {
  * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
  * If this dictionary is the best so far save it and its parameters.
  */
-void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
-                              COVER_dictSelection_t selection) {
+void COVER_best_finish(COVER_best_t* best,
+                      ZDICT_cover_params_t parameters,
+                      COVER_dictSelection_t selection)
+{
   void* dict = selection.dictContent;
   size_t compressedSize = selection.totalCompressedSize;
   size_t dictSize = selection.dictSize;
@@ -980,8 +984,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
   size_t largestCompressed = 0;
   BYTE* customDictContentEnd = customDictContent + dictContentSize;
 
-  BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
-  BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
+  BYTE* largestDictbuffer = (BYTE*)malloc(dictBufferCapacity);
+  BYTE* candidateDictBuffer = (BYTE*)malloc(dictBufferCapacity);
   double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
 
   if (!largestDictbuffer || !candidateDictBuffer) {
diff --git a/lib/dictBuilder/cover.h b/lib/dictBuilder/cover.h
index 252624bdeb5..a5d7506ef6d 100644
--- a/lib/dictBuilder/cover.h
+++ b/lib/dictBuilder/cover.h
@@ -12,14 +12,8 @@
 #  define ZDICT_STATIC_LINKING_ONLY
 #endif
 
-#include   /* fprintf */
-#include  /* malloc, free, qsort */
-#include  /* memset */
-#include    /* clock */
-#include "../common/mem.h" /* read */
-#include "../common/pool.h"
-#include "../common/threading.h"
-#include "../common/zstd_internal.h" /* includes zstd.h */
+#include "../common/threading.h" /* ZSTD_pthread_mutex_t */
+#include "../common/mem.h"   /* U32, BYTE */
 #include "../zdict.h"
 
 /**

From ff0afbad58611d22b8b4477e9383b9b9ffdbaee6 Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Wed, 13 Mar 2024 09:58:34 -0700
Subject: [PATCH 254/283] [asm][aarch64] Mark that BTI and PAC are supported

Mark that `huf_decompress_amd64.S` supports BTI and PAC, which it trivially does because it is empty for aarch64.

The issue only requested BTI markings, but it also makes sense to mark PAC, which is the only other feature.

Also run add a test for this mode to the ARM64 QEMU test. Before this PR it warns on `huf_decompress_amd64.S`, after it doesn't.

Fixes Issue #3841.
---
 .github/workflows/dev-short-tests.yml |  1 +
 lib/decompress/huf_decompress_amd64.S | 23 ++++++++++++++++++++++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml
index b2aaff89cf7..5324b38d9ac 100644
--- a/.github/workflows/dev-short-tests.yml
+++ b/.github/workflows/dev-short-tests.yml
@@ -409,6 +409,7 @@ jobs:
     - name: ARM64
       if: ${{ matrix.name == 'ARM64' }}
       run: |
+        LDFLAGS="-static -z force-bti" MOREFLAGS="-mbranch-protection=standard" CC=$XCC QEMU_SYS=$XEMU make clean check
         LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
     - name: PPC
       if: ${{ matrix.name == 'PPC' }}
diff --git a/lib/decompress/huf_decompress_amd64.S b/lib/decompress/huf_decompress_amd64.S
index 3b96b44612f..78da291ee3c 100644
--- a/lib/decompress/huf_decompress_amd64.S
+++ b/lib/decompress/huf_decompress_amd64.S
@@ -10,11 +10,32 @@
 
 #include "../common/portability_macros.h"
 
+#if defined(__ELF__) && defined(__GNUC__)
 /* Stack marking
  * ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
  */
-#if defined(__ELF__) && defined(__GNUC__)
 .section .note.GNU-stack,"",%progbits
+
+#if defined(__aarch64__)
+/* Mark that this assembly supports BTI & PAC, because it is empty for aarch64.
+ * See: https://github.com/facebook/zstd/issues/3841
+ * See: https://gcc.godbolt.org/z/sqr5T4ffK
+ * See: https://lore.kernel.org/linux-arm-kernel/20200429211641.9279-8-broonie@kernel.org/
+ * See: https://reviews.llvm.org/D62609
+ */
+.pushsection .note.gnu.property, "a"
+.p2align 3
+.long 4                 /* size of the name - "GNU\0" */
+.long 0x10              /* size of descriptor */
+.long 0x5               /* NT_GNU_PROPERTY_TYPE_0 */
+.asciz "GNU"
+.long 0xc0000000        /* pr_type - GNU_PROPERTY_AARCH64_FEATURE_1_AND */
+.long 4                 /* pr_datasz - 4 bytes */
+.long 3                 /* pr_data - GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC */
+.p2align 3              /* pr_padding - bring everything to 8 byte alignment */
+.popsection
+#endif
+
 #endif
 
 #if ZSTD_ENABLE_ASM_X86_64_BMI2

From d6ee2d5d2454f5023c78d59e7464c9c902d6597b Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" 
Date: Tue, 12 Mar 2024 14:01:17 -0700
Subject: [PATCH 255/283] Use `utimensat()` on FreeBSD

FreeBSD only claims to support POSIX 2001 [0]. But they do in fact support
`utimensat()`. This adds a specific check to opt them in to using it. This
value was selected by consulting [1].

See discussion on #3952.

Further addresses #3748.

[0] https://github.com/freebsd/freebsd-src/blob/937a0055858a098027f464abf0b2b1ec5d36748f/sys/sys/unistd.h#L96

[1] https://docs.freebsd.org/en/books/porters-handbook/versions/
---
 programs/util.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/programs/util.c b/programs/util.c
index 862fc80080d..116f88e103b 100644
--- a/programs/util.c
+++ b/programs/util.c
@@ -23,16 +23,27 @@ extern "C" {
 #include 
 #include 
 
+#if defined(__FreeBSD__)
+#include  /* __FreeBSD_version */
+#endif /* #ifdef __FreeBSD__ */
+
 #if defined(_WIN32)
 #  include   /* utime */
 #  include          /* _chmod */
+#  define ZSTD_USE_UTIMENSAT 0
 #else
 #  include      /* chown, stat */
-#  if PLATFORM_POSIX_VERSION < 200809L || !defined(st_mtime)
-#    include     /* utime */
+#  include    /* utimensat, st_mtime */
+#  if (PLATFORM_POSIX_VERSION >= 200809L && defined(st_mtime)) \
+      || (defined(__FreeBSD__) && __FreeBSD_version >= 1100056)
+#    define ZSTD_USE_UTIMENSAT 1
 #  else
+#    define ZSTD_USE_UTIMENSAT 0
+#  endif
+#  if ZSTD_USE_UTIMENSAT
 #    include     /* AT_FDCWD */
-#    include  /* utimensat */
+#  else
+#    include     /* utime */
 #  endif
 #endif
 
@@ -259,7 +270,12 @@ int UTIL_utime(const char* filename, const stat_t *statbuf)
      * that struct stat has a struct timespec st_mtim member. We need this
      * check because there are some platforms that claim to be POSIX 2008
      * compliant but which do not have st_mtim... */
-#if (PLATFORM_POSIX_VERSION >= 200809L) && defined(st_mtime)
+    /* FreeBSD has implemented POSIX 2008 for a long time but still only
+     * advertises support for POSIX 2001. They have a version macro that
+     * lets us safely gate them in.
+     * See https://docs.freebsd.org/en/books/porters-handbook/versions/.
+     */
+#if ZSTD_USE_UTIMENSAT
     {
         /* (atime, mtime) */
         struct timespec timebuf[2] = { {0, UTIME_NOW} };
@@ -1546,7 +1562,6 @@ int UTIL_countCores(int logical)
 
 #elif defined(__FreeBSD__)
 
-#include 
 #include 
 
 /* Use physical core sysctl when available

From 2215101cad9d809e345e2e939fa1e125563d25cf Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" 
Date: Wed, 13 Mar 2024 16:32:04 -0400
Subject: [PATCH 256/283] Add a Few Tests

---
 .../file-handling/directory-mirror.sh         | 49 +++++++++++++++++++
 .../directory-mirror.sh.stderr.exact          |  0
 .../directory-mirror.sh.stdout.exact          |  0
 3 files changed, 49 insertions(+)
 create mode 100755 tests/cli-tests/file-handling/directory-mirror.sh
 create mode 100644 tests/cli-tests/file-handling/directory-mirror.sh.stderr.exact
 create mode 100644 tests/cli-tests/file-handling/directory-mirror.sh.stdout.exact

diff --git a/tests/cli-tests/file-handling/directory-mirror.sh b/tests/cli-tests/file-handling/directory-mirror.sh
new file mode 100755
index 00000000000..b2f70b59126
--- /dev/null
+++ b/tests/cli-tests/file-handling/directory-mirror.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+set -e
+
+# setup
+mkdir -p src/.hidden src/dir
+mkdir mid dst
+
+echo "file1" > src/file1
+echo "file2" > src/.file2
+echo "file3" > src/.hidden/.file3
+echo "file4" > src/dir/.file4
+
+# relative paths
+zstd -q -r --output-dir-mirror mid/ src/
+zstd -q -d -r --output-dir-mirror dst/ mid/src/
+
+diff --brief --recursive --new-file src/ dst/mid/src/
+
+# reset
+rm -rf mid dst
+mkdir mid dst
+
+# from inside the directory
+(cd src; zstd -q -r --output-dir-mirror ../mid/ ./)
+(cd mid; zstd -q -d -r --output-dir-mirror ../dst/ ./)
+
+diff --brief --recursive --new-file src/ dst/
+
+# reset
+rm -rf mid dst
+mkdir mid dst
+
+# absolute paths
+export BASE_PATH="$(pwd)"
+
+zstd -q -r --output-dir-mirror mid/ "${BASE_PATH}/src/"
+zstd -q -d -r --output-dir-mirror  dst/ "${BASE_PATH}/mid/${BASE_PATH}/src/"
+
+diff --brief --recursive --new-file src/ "dst/${BASE_PATH}/mid/${BASE_PATH}/src/"
+
+# reset
+rm -rf mid dst
+mkdir mid dst
+
+# dots
+zstd -q -r --output-dir-mirror mid/ ./src/./
+zstd -q -d -r --output-dir-mirror  dst/ ./mid/./src/./
+
+diff --brief --recursive --new-file src/ dst/mid/src/
diff --git a/tests/cli-tests/file-handling/directory-mirror.sh.stderr.exact b/tests/cli-tests/file-handling/directory-mirror.sh.stderr.exact
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/cli-tests/file-handling/directory-mirror.sh.stdout.exact b/tests/cli-tests/file-handling/directory-mirror.sh.stdout.exact
new file mode 100644
index 00000000000..e69de29bb2d

From 86b8e39a84d15ebcae3fa4b36240db27f2ae74ac Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" 
Date: Wed, 13 Mar 2024 16:33:30 -0400
Subject: [PATCH 257/283] Remove Erroneous Exclusion of Hidden Files and
 Folders in `--output-dir-mirror`

---
 programs/util.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/programs/util.c b/programs/util.c
index 862fc80080d..d6f33d4a877 100644
--- a/programs/util.c
+++ b/programs/util.c
@@ -1119,9 +1119,6 @@ static char* mallocAndJoin2Dir(const char *dir1, const char *dir2)
         memcpy(outDirBuffer, dir1, dir1Size);
         outDirBuffer[dir1Size] = '\0';
 
-        if (dir2[0] == '.')
-            return outDirBuffer;
-
         buffer = outDirBuffer + dir1Size;
         if (dir1Size > 0 && *(buffer - 1) != PATH_SEP) {
             *buffer = PATH_SEP;

From 3613448fb8623361dc6bd9b32c8d4b3d2da85823 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" 
Date: Wed, 13 Mar 2024 16:56:54 -0400
Subject: [PATCH 258/283] Promote `ZSTD_c_targetCBlockSize` Parameter to Stable
 API

This feature has demonstrated itself to be useful in web compression and we
want to encourage other folks to use it. But we currently make it difficult
to do so since it's locked away in the experimental API.

The API itself is really straightforward and I think it's fine to commit to
maintaining support / compatibility for this API even if in the future the
underlying implementation may continue to evolve.

Note that this commit changes its enum name and also its numeric value. Users
who respected the instructions of using the experimental API should be fine
with both of these changes since they should only have referred to it by the.

Conceivably someone could have done bad feature detection of this capability
by doing `#ifdef ZSTD_c_targetCBlockSize` which will now return false since
it's no longer a macro... but I think that's an acceptable hypothetical
breakage.
---
 lib/zstd.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/lib/zstd.h b/lib/zstd.h
index 84126930520..d2621c5c957 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -390,6 +390,16 @@ typedef enum {
                               * The higher the value of selected strategy, the more complex it is,
                               * resulting in stronger and slower compression.
                               * Special: value 0 means "use default strategy". */
+    ZSTD_c_targetCBlockSize=130, /* Tries to fit compressed block size to be
+                                  * around targetCBlockSize. No target when
+                                  * targetCBlockSize == 0. There is no guarantee
+                                  * on compressed block size (default:0).
+                                  * Since the decoder has to buffer a complete
+                                  * block to begin decoding it, in low band-
+                                  * width streaming environments this may
+                                  * improve end-to-end latency. Bound by
+                                  * ZSTD_TARGETCBLOCKSIZE_MIN and
+                                  * ZSTD_TARGETCBLOCKSIZE_MAX. */
     /* LDM mode parameters */
     ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
                                      * This parameter is designed to improve compression ratio
@@ -469,7 +479,6 @@ typedef enum {
      * ZSTD_c_forceMaxWindow
      * ZSTD_c_forceAttachDict
      * ZSTD_c_literalCompressionMode
-     * ZSTD_c_targetCBlockSize
      * ZSTD_c_srcSizeHint
      * ZSTD_c_enableDedicatedDictSearch
      * ZSTD_c_stableInBuffer
@@ -490,7 +499,7 @@ typedef enum {
      ZSTD_c_experimentalParam3=1000,
      ZSTD_c_experimentalParam4=1001,
      ZSTD_c_experimentalParam5=1002,
-     ZSTD_c_experimentalParam6=1003,
+     /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */
      ZSTD_c_experimentalParam7=1004,
      ZSTD_c_experimentalParam8=1005,
      ZSTD_c_experimentalParam9=1006,
@@ -1951,11 +1960,6 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
  */
 #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
 
-/* Tries to fit compressed block size to be around targetCBlockSize.
- * No target when targetCBlockSize == 0.
- * There is no guarantee on compressed block size (default:0) */
-#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
-
 /* User's best guess of source size.
  * Hint is not valid when srcSizeHint == 0.
  * There is no guarantee that hint is close to actual source size,

From a0a9bc6c95436c85002ffca972ae545f862e1638 Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Thu, 14 Mar 2024 08:47:04 -0700
Subject: [PATCH 259/283] [cmake] Always create libzstd target

If both `ZSTD_BUILD_SHARED` and `ZSTD_BUILD_STATIC` are set, then cmake exports the libraries `libzstd_shared` and `libzstd_static` only.
It does not export `libzstd`, which is only exported when exactly one of `ZSTD_BUILD_SHARED` and `ZSTD_BUILD_STATIC` is set.
This PR exports `libzstd` in that case, based on the value of the standard CMake variable [`BUILD_SHARED_LIBS`](https://cmake.org/cmake/help/latest/variable/BUILD_SHARED_LIBS.html).
This ensures that `libzstd` can always be used to refer to the exported zstd library, since the build errors if neither `ZSTD_BUILD_SHARED` nor `ZSTD_BUILD_STATIC` are set.

I tested all the possible combinations of `ZSTD_BUILD_SHARED`, `ZSTD_BUILD_STATIC`, and `BUILD_SHARED_LIBS` and they always worked as expected:
* If only exactly one of `ZSTD_BUILD_SHARED` and `ZSTD_BUILD_STATIC` is set, that is used as `libzstd`.
* Otherwise, libzstd is set based on `BUILD_SHARED_LIBS`.

Fixes #3859.
---
 build/cmake/lib/CMakeLists.txt | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt
index f5820af572b..53df541ff6f 100644
--- a/build/cmake/lib/CMakeLists.txt
+++ b/build/cmake/lib/CMakeLists.txt
@@ -159,6 +159,20 @@ if (ZSTD_BUILD_STATIC AND NOT ZSTD_BUILD_SHARED)
     target_link_libraries(libzstd INTERFACE libzstd_static)
     list(APPEND library_targets libzstd)
 endif ()
+if (ZSTD_BUILD_SHARED AND ZSTD_BUILD_STATIC)
+    # If both ZSTD_BUILD_SHARED and ZSTD_BUILD_STATIC are set, which is the
+    # default, fallback to using BUILD_SHARED_LIBS to determine whether to
+    # set libzstd to static or shared.
+    if (BUILD_SHARED_LIBS)
+        add_library(libzstd INTERFACE)
+        target_link_libraries(libzstd INTERFACE libzstd_shared)
+        list(APPEND library_targets libzstd)
+    else ()
+        add_library(libzstd INTERFACE)
+        target_link_libraries(libzstd INTERFACE libzstd_static)
+        list(APPEND library_targets libzstd)
+    endif ()
+endif ()
 
 # Add specific compile definitions for MSVC project
 if (MSVC)

From 9cc3304614f9ea28a870f9e94e1e449c6d7de1fc Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Thu, 14 Mar 2024 12:11:11 -0700
Subject: [PATCH 260/283] add line number to debug traces

---
 lib/common/debug.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/common/debug.h b/lib/common/debug.h
index 8049e4b5d89..a16b69e5743 100644
--- a/lib/common/debug.h
+++ b/lib/common/debug.h
@@ -92,10 +92,14 @@ extern int g_debuglevel; /* the variable is only declared,
         }                                  \
     } while (0)
 
+#define STRINGIFY(x) #x
+#define TOSTRING(x) STRINGIFY(x)
+#define LINE_AS_STRING TOSTRING(__LINE__)
+
 #  define DEBUGLOG(l, ...)                               \
     do {                                                 \
         if (l<=g_debuglevel) {                           \
-            ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
+            ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \
             ZSTD_DEBUG_PRINT(" \n");                     \
         }                                                \
     } while (0)

From 559762da12f54712d44f619098aa4a7e7bc5727b Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky 
Date: Thu, 14 Mar 2024 15:55:01 -0400
Subject: [PATCH 261/283] Remove duplicate and incorrect docs in
 zstd_decompress.c (#3967)

---
 lib/decompress/zstd_decompress.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 42636d5780c..f6579743859 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -797,10 +797,8 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
 }
 
 /** ZSTD_findFrameCompressedSize() :
- *  compatible with legacy mode
- *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
- *  `srcSize` must be at least as large as the frame contained
- *  @return : the compressed size of the frame starting at `src` */
+ * See docs in zstd.h
+ * Note: compatible with legacy mode */
 size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
 {
     ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);

From 686e7e4b4b3821df4de0e7dd4722049ee2c5fb88 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Thu, 14 Mar 2024 15:38:14 -0700
Subject: [PATCH 262/283] updated version to v1.5.6

---
 doc/zstd_manual.html |  4 ++--
 lib/zstd.h           |  2 +-
 programs/zstd.1      |  2 +-
 programs/zstdgrep.1  | 13 +++++++++++--
 programs/zstdless.1  |  9 +++++++--
 5 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index c81627d8768..58fe958dc0b 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1,10 +1,10 @@
 
 
 
-zstd 1.5.5 Manual
+zstd 1.5.6 Manual
 
 
-

zstd 1.5.5 Manual

+

zstd 1.5.6 Manual


Contents

    diff --git a/lib/zstd.h b/lib/zstd.h index 84126930520..79432eb884a 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -106,7 +106,7 @@ extern "C" { /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MINOR 5 -#define ZSTD_VERSION_RELEASE 5 +#define ZSTD_VERSION_RELEASE 6 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) /*! ZSTD_versionNumber() : diff --git a/programs/zstd.1 b/programs/zstd.1 index f7af5527071..2b5a98511f0 100644 --- a/programs/zstd.1 +++ b/programs/zstd.1 @@ -1,5 +1,5 @@ . -.TH "ZSTD" "1" "March 2024" "zstd 1.5.5" "User Commands" +.TH "ZSTD" "1" "March 2024" "zstd 1.5.6" "User Commands" . .SH "NAME" \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files diff --git a/programs/zstdgrep.1 b/programs/zstdgrep.1 index 1204e5bb6fb..d7fda583398 100644 --- a/programs/zstdgrep.1 +++ b/programs/zstdgrep.1 @@ -1,17 +1,26 @@ -.TH "ZSTDGREP" "1" "March 2023" "zstd 1.5.5" "User Commands" +. +.TH "ZSTDGREP" "1" "March 2024" "zstd 1.5.6" "User Commands" +. .SH "NAME" \fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files +. .SH "SYNOPSIS" -\fBzstdgrep\fR [\fIgrep\-flags\fR] [\-\-] \fIpattern\fR [\fIfiles\fR \|\.\|\.\|\.] +\fBzstdgrep\fR [\fIgrep\-flags\fR] [\-\-] \fIpattern\fR [\fIfiles\fR \.\.\.] +. .SH "DESCRIPTION" \fBzstdgrep\fR runs \fBgrep\fR(1) on files, or \fBstdin\fR if no files argument is given, after decompressing them with \fBzstdcat\fR(1)\. +. .P The \fIgrep\-flags\fR and \fIpattern\fR arguments are passed on to \fBgrep\fR(1)\. If an \fB\-e\fR flag is found in the \fIgrep\-flags\fR, \fBzstdgrep\fR will not look for a \fIpattern\fR argument\. +. .P Note that modern \fBgrep\fR alternatives such as \fBripgrep\fR (\fBrg\fR(1)) support \fBzstd\fR\-compressed files out of the box, and can prove better alternatives than \fBzstdgrep\fR notably for unsupported complex pattern searches\. Note though that such alternatives may also feature some minor command line differences\. +. .SH "EXIT STATUS" In case of missing arguments or missing pattern, 1 will be returned, otherwise 0\. +. .SH "SEE ALSO" \fBzstd\fR(1) +. .SH "AUTHORS" Thomas Klausner \fIwiz@NetBSD\.org\fR diff --git a/programs/zstdless.1 b/programs/zstdless.1 index bc019b26365..7dd65f8fc44 100644 --- a/programs/zstdless.1 +++ b/programs/zstdless.1 @@ -1,9 +1,14 @@ -.TH "ZSTDLESS" "1" "March 2023" "zstd 1.5.5" "User Commands" +. +.TH "ZSTDLESS" "1" "March 2024" "zstd 1.5.6" "User Commands" +. .SH "NAME" \fBzstdless\fR \- view zstandard\-compressed files +. .SH "SYNOPSIS" -\fBzstdless\fR [\fIflags\fR] [\fIfile\fR \|\.\|\.\|\.] +\fBzstdless\fR [\fIflags\fR] [\fIfile\fR \.\.\.] +. .SH "DESCRIPTION" \fBzstdless\fR runs \fBless\fR(1) on files or stdin, if no \fIfile\fR argument is given, after decompressing them with \fBzstdcat\fR(1)\. +. .SH "SEE ALSO" \fBzstd\fR(1) From 351498b9320e9c03cbe4ed722e8967a5673f46a0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 14 Mar 2024 17:01:34 -0700 Subject: [PATCH 263/283] update CHANGELOG for v1.5.6 --- CHANGELOG | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c7a7506ee31..23c0128203f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,38 @@ +V1.5.6 (Mar 2024) +api: Promote `ZSTD_c_targetCBlockSize` to Stable API by @felixhandte +api: new `ZSTD_d_maxBlockSize` experimental parameter, to reduce streaming decompression memory, by @terrelln +perf: improve performance of param `ZSTD_c_targetCBlockSize`, by @Cyan4973 +perf: improved compression of arrays of integers at high compression, by @Cyan4973 +lib: reduce binary size with selective built-time exclusion, by @felixhandte +lib: improved huffman speed on small data and linux kernel, by @terrelln +lib: accept dictionaries with partial literal tables, by @terrelln +lib: fix CCtx size estimation with external sequence producer, by @embg +lib: fix corner case decoder behaviors, by @Cyan4973 and @aimuz +lib: fix zdict prototype mismatch in static_only mode, by @ldv-alt +cli: add common compressed file types to `--exclude-compressed`` by @daniellerozenblit +cli: fix mixing `-c` and `-o` commands with `--rm`, by @Cyan4973 +cli: fix erroneous exclusion of hidden files with `--output-dir-mirror` by @felixhandte +cli: improved time accuracy on BSD, by @felixhandte +cli: better errors on argument parsing, by @KapJI +tests: better compatibility with older versions of `grep`, by @Cyan4973 +tests: lorem ipsum generator as default backup content, by @Cyan4973 +build: cmake improvements by @terrelln, @sighingnow, @gjasny, @JohanMabille, @Saverio976, @gruenich, @teo-tsirpanis +build: bazel support, by @jondo2010 +build: fix cross-compiling for AArch64 with lld by @jcelerier +build: fix Apple platform compatibility, by @nidhijaju +build: fix Visual 2012 and lower compatibility, by @Cyan4973 +build: improve win32 support, by @DimitriPapadopoulos +build: better C90 compliance for zlibWrapper, by @emaste +port: make: fat binaries on macos, by @mredig +port: ARM64EC compatibility for Windows, by @dunhor +port: QNX support by @klausholstjacobsen +port: MSYS2 and Cygwin makefile installation and test support, by @QBos07 +port: risc-v support validation in CI, by @Cyan4973 +port: sparc64 support validation in CI, by @Cyan4973 +port: AIX compatibility, by @likema +port: HP-UX compatibility, by @likema +doc: Improved specification accuracy, by @elasota + v1.5.5 (Apr 2023) fix: fix rare corruption bug affecting the high compression mode, reported by @danlark1 (#3517, @terrelln) perf: improve mid-level compression speed (#3529, #3533, #3543, @yoniko and #3552, @terrelln) @@ -98,7 +133,7 @@ build: support for m68k (Motorola 68000's), by @cyan4973 build: improved AIX support, by @Helflym build: improved meson unofficial build, by @eli-schwartz cli : custom memory limit when training dictionary (#2925), by @embg -cli : report advanced parameters information when compressing in very verbose mode (``-vv`), by @Svetlitski-FB +cli : report advanced parameters information when compressing in very verbose mode (`-vv`), by @Svetlitski-FB v1.5.0 (May 11, 2021) api: Various functions promoted from experimental to stable API: (#2579-2581, @senhuang42) @@ -165,7 +200,7 @@ api: Add Function to Generate Skippable Frame (#2439, @senhuang42) perf: New Algorithms for the Long Distance Matcher (#2483, @mpu) perf: Performance Improvements for Long Distance Matcher (#2464, @mpu) perf: Don't Shrink Window Log when Streaming with a Dictionary (#2451, @terrelln) -cli: Fix `--output-dir-mirror`'s Rejection of `..`-Containing Paths (#2512, @felixhandte) +cli: Fix `--output-dir-mirror` rejection of `..` -containing paths (#2512, @felixhandte) cli: Allow Input From Console When `-f`/`--force` is Passed (#2466, @felixhandte) cli: Improve Help Message (#2500, @senhuang42) tests: Remove Flaky Tests (#2455, #2486, #2445, @Cyan4973) From 88301b58c1b2f84e55f27fd7259db4f8afdafc22 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 05:37:52 +0000 Subject: [PATCH 264/283] Bump actions/cache from 3 to 4 Bumps [actions/cache](https://github.com/actions/cache) from 3 to 4. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/cache dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/commit.yml | 2 +- .github/workflows/nightly.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/commit.yml b/.github/workflows/commit.yml index 83a6412b932..5fc8cb1fd5e 100644 --- a/.github/workflows/commit.yml +++ b/.github/workflows/commit.yml @@ -59,7 +59,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: restore_cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: key: regression-cache-{{ checksum "tests/regression/data.c" }}-v0 path: tests/regression/cache diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 0eb9ecbde67..9206a07fd50 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -18,7 +18,7 @@ jobs: CIRCLE_ARTIFACTS: "/tmp/circleci-artifacts" steps: - uses: actions/checkout@v4 - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: key: regression-cache-{{ checksum "tests/regression/data.c" }}-v0 path: tests/regression/cache From 9dca0602f45e925c919ac130c9c9f37d88d4ab98 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 05:37:56 +0000 Subject: [PATCH 265/283] Bump github/codeql-action from 3.24.6 to 3.24.7 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.24.6 to 3.24.7. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/8a470fddafa5cbb6266ee11b37ef4d8aae19c571...3ab4101902695724f9365a384f86c1074d94e18c) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 513320271de..a5d5f02a34c 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -59,6 +59,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@8a470fddafa5cbb6266ee11b37ef4d8aae19c571 # tag=v3.24.6 + uses: github/codeql-action/upload-sarif@3ab4101902695724f9365a384f86c1074d94e18c # tag=v3.24.7 with: sarif_file: results.sarif From 79cd0ff7120ed05ac9e52ba4c7a484752be4d758 Mon Sep 17 00:00:00 2001 From: dsvi <23555438+dsvi@users.noreply.github.com> Date: Sat, 5 Aug 2023 01:47:46 +0300 Subject: [PATCH 266/283] Makes it possible to use the lib through FetchContent or ExternalProject_Add --- build/cmake/lib/CMakeLists.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index 53df541ff6f..36585969039 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -33,9 +33,6 @@ if(NOT ZSTD_BUILD_SHARED AND NOT ZSTD_BUILD_STATIC) message(SEND_ERROR "You need to build at least one flavor of libzstd") endif() -# Define library directory, where sources and header files are located -include_directories(${LIBRARY_DIR} ${LIBRARY_DIR}/common) - file(GLOB CommonSources ${LIBRARY_DIR}/common/*.c) file(GLOB CompressSources ${LIBRARY_DIR}/compress/*.c) file(GLOB DecompressSources ${LIBRARY_DIR}/decompress/*.c) @@ -119,10 +116,14 @@ macro (add_definition target var) endif () endmacro () +# Define include directories, where header files are located +set(LIBRARY_INCLUDES "${LIBRARY_DIR} ${LIBRARY_DIR}/common") + # Split project to static and shared libraries build set(library_targets) if (ZSTD_BUILD_SHARED) add_library(libzstd_shared SHARED ${Sources} ${Headers} ${PlatformDependResources}) + target_include_directories(libzstd_shared PUBLIC $) list(APPEND library_targets libzstd_shared) if (ZSTD_MULTITHREAD_SUPPORT) set_property(TARGET libzstd_shared APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_MULTITHREAD") @@ -136,6 +137,7 @@ if (ZSTD_BUILD_SHARED) endif () if (ZSTD_BUILD_STATIC) add_library(libzstd_static STATIC ${Sources} ${Headers}) + target_include_directories(libzstd_static PUBLIC $) list(APPEND library_targets libzstd_static) if (ZSTD_MULTITHREAD_SUPPORT) set_property(TARGET libzstd_static APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_MULTITHREAD") From a595e5812a5c7e4ac47839383f931fb8000623f0 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 14 Mar 2024 12:12:55 -0700 Subject: [PATCH 267/283] [cmake] Fix up PR #3716 * Make a variable `PublicHeaders` for Zstd's public headers * Add `PublicHeaders` to `Headers`, which was missing * Only export `${LIBRARY_DIR}` publicly, not `common/` * Switch the `target_include_directories()` to `INTERFACE` because zstd uses relative includes internally, so doesn't need any include directories to build * Switch installation to use the `PublicHeaders` variable, and test that the right headers are installed --- build/cmake/lib/CMakeLists.txt | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index 36585969039..eb21b8b3259 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -48,6 +48,7 @@ endif () file(GLOB DictBuilderSources ${LIBRARY_DIR}/dictBuilder/*.c) file(GLOB DeprecatedSources ${LIBRARY_DIR}/deprecated/*.c) +file(GLOB PublicHeaders ${LIBRARY_DIR}/*.h) file(GLOB CommonHeaders ${LIBRARY_DIR}/common/*.h) file(GLOB CompressHeaders ${LIBRARY_DIR}/compress/*.h) file(GLOB DecompressHeaders ${LIBRARY_DIR}/decompress/*.h) @@ -55,7 +56,7 @@ file(GLOB DictBuilderHeaders ${LIBRARY_DIR}/dictBuilder/*.h) file(GLOB DeprecatedHeaders ${LIBRARY_DIR}/deprecated/*.h) set(Sources ${CommonSources}) -set(Headers ${LIBRARY_DIR}/zstd.h ${CommonHeaders}) +set(Headers ${PublicHeaders} ${CommonHeaders}) if (ZSTD_BUILD_COMPRESSION) set(Sources ${Sources} ${CompressSources}) set(Headers ${Headers} ${CompressHeaders}) @@ -75,7 +76,6 @@ endif() if (ZSTD_LEGACY_SUPPORT) set(LIBRARY_LEGACY_DIR ${LIBRARY_DIR}/legacy) - include_directories(${LIBRARY_LEGACY_DIR}) set(Sources ${Sources} ${LIBRARY_LEGACY_DIR}/zstd_v01.c @@ -116,14 +116,14 @@ macro (add_definition target var) endif () endmacro () -# Define include directories, where header files are located -set(LIBRARY_INCLUDES "${LIBRARY_DIR} ${LIBRARY_DIR}/common") +# Define directories containing the library's public headers +set(PUBLIC_INCLUDE_DIRS ${LIBRARY_DIR}) # Split project to static and shared libraries build set(library_targets) if (ZSTD_BUILD_SHARED) add_library(libzstd_shared SHARED ${Sources} ${Headers} ${PlatformDependResources}) - target_include_directories(libzstd_shared PUBLIC $) + target_include_directories(libzstd_shared INTERFACE $) list(APPEND library_targets libzstd_shared) if (ZSTD_MULTITHREAD_SUPPORT) set_property(TARGET libzstd_shared APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_MULTITHREAD") @@ -137,7 +137,7 @@ if (ZSTD_BUILD_SHARED) endif () if (ZSTD_BUILD_STATIC) add_library(libzstd_static STATIC ${Sources} ${Headers}) - target_include_directories(libzstd_static PUBLIC $) + target_include_directories(libzstd_static INTERFACE $) list(APPEND library_targets libzstd_static) if (ZSTD_MULTITHREAD_SUPPORT) set_property(TARGET libzstd_static APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_MULTITHREAD") @@ -224,11 +224,7 @@ configure_file("${LIBRARY_DIR}/libzstd.pc.in" "${CMAKE_CURRENT_BINARY_DIR}/libzs install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libzstd.pc" DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") # install target -install(FILES - "${LIBRARY_DIR}/zstd.h" - "${LIBRARY_DIR}/zdict.h" - "${LIBRARY_DIR}/zstd_errors.h" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") +install(FILES ${PublicHeaders} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") install(TARGETS ${library_targets} EXPORT zstdExports From 7d970bd83c2323c5e78b4f15ae850373c70f055d Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 18 Mar 2024 10:55:53 -0400 Subject: [PATCH 268/283] Implement one-shot fallback for magicless format (#3971) --- lib/decompress/zstd_decompress.c | 22 ++++++++++++-------- tests/zstreamtest.c | 35 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index f6579743859..ee2cda3b639 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -729,17 +729,17 @@ static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) return frameSizeInfo; } -static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) +static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize, ZSTD_format_e format) { ZSTD_frameSizeInfo frameSizeInfo; ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) + if (format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) return ZSTD_findFrameSizeInfoLegacy(src, srcSize); #endif - if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) + if (format == ZSTD_f_zstd1 && (srcSize >= ZSTD_SKIPPABLEHEADERSIZE) && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); assert(ZSTD_isError(frameSizeInfo.compressedSize) || @@ -753,7 +753,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize ZSTD_frameHeader zfh; /* Extract Frame Header */ - { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); + { size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format); if (ZSTD_isError(ret)) return ZSTD_errorFrameSizeInfo(ret); if (ret > 0) @@ -796,13 +796,17 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize } } +static size_t ZSTD_findFrameCompressedSize_advanced(const void *src, size_t srcSize, ZSTD_format_e format) { + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, format); + return frameSizeInfo.compressedSize; +} + /** ZSTD_findFrameCompressedSize() : * See docs in zstd.h * Note: compatible with legacy mode */ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) { - ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); - return frameSizeInfo.compressedSize; + return ZSTD_findFrameCompressedSize_advanced(src, srcSize, ZSTD_f_zstd1); } /** ZSTD_decompressBound() : @@ -816,7 +820,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) unsigned long long bound = 0; /* Iterate over each frame */ while (srcSize > 0) { - ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1); size_t const compressedSize = frameSizeInfo.compressedSize; unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) @@ -836,7 +840,7 @@ size_t ZSTD_decompressionMargin(void const* src, size_t srcSize) /* Iterate over each frame */ while (srcSize > 0) { - ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1); size_t const compressedSize = frameSizeInfo.compressedSize; unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; ZSTD_frameHeader zfh; @@ -2178,7 +2182,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN && zds->fParams.frameType != ZSTD_skippableFrame && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { - size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); + size_t const cSize = ZSTD_findFrameCompressedSize_advanced(istart, (size_t)(iend-istart), zds->format); if (cSize <= (size_t)(iend-istart)) { /* shortcut : using single-pass mode */ size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 7cc4068bc09..e0ee4c3e934 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -2417,6 +2417,41 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : Test single-shot fallback for magicless mode: ", testNb++); + { + // Aquire resources + size_t const srcSize = COMPRESSIBLE_NOISE_LENGTH; + void* src = malloc(srcSize); + size_t const dstSize = ZSTD_compressBound(srcSize); + void* dst = malloc(dstSize); + size_t const valSize = srcSize; + void* val = malloc(valSize); + ZSTD_inBuffer inBuf = { dst, dstSize, 0 }; + ZSTD_outBuffer outBuf = { val, valSize, 0 }; + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + CHECK(!src || !dst || !val || !dctx || !cctx, "memory allocation failure"); + + // Write test data for decompression to dst + RDG_genBuffer(src, srcSize, compressibility, 0.0, 0xdeadbeef); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless)); + CHECK_Z(ZSTD_compress2(cctx, dst, dstSize, src, srcSize)); + + // Run decompression + CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless)); + CHECK_Z(ZSTD_decompressStream(dctx, &outBuf, &inBuf)); + + // Validate + CHECK(outBuf.pos != srcSize, "decompressed size must match"); + CHECK(memcmp(src, val, srcSize) != 0, "decompressed data must match"); + + // Cleanup + free(src); free(dst); free(val); + ZSTD_freeCCtx(cctx); + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + _end: FUZ_freeDictionary(dictionary); ZSTD_freeCStream(zc); From cd4dba74dea8a92f9e33d72fcb5b60224bc4e6c3 Mon Sep 17 00:00:00 2001 From: Alexander Kanavin Date: Mon, 19 Jun 2023 17:10:09 +0200 Subject: [PATCH 269/283] pzstd: use c++14 without conditions Doing this check with a direct c++ snippet is prone to portability problems: - \043 is not portable between shells: dash expands it to #, bash does not; - using # directly works with make 4.3 but does not with make 4.2. Let's just use the c++ version that covers both the code and the gtest. --- contrib/pzstd/Makefile | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/contrib/pzstd/Makefile b/contrib/pzstd/Makefile index 9604eb2438c..e4b3e8a21f2 100644 --- a/contrib/pzstd/Makefile +++ b/contrib/pzstd/Makefile @@ -37,11 +37,8 @@ CFLAGS += -Wno-deprecated-declarations PZSTD_INC = -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(PROGDIR) -I. GTEST_INC = -isystem googletest/googletest/include -# If default C++ version is older than C++11, explicitly set C++11, which is the -# minimum required by the code. -ifeq ($(shell echo "\043if __cplusplus < 201103L\n\043error\n\043endif" | $(CXX) -x c++ -Werror -c - -o /dev/null 2>/dev/null && echo 1 || echo 0),0) -PZSTD_CXX_STD := -std=c++11 -endif +# Set the minimum required by gtest +PZSTD_CXX_STD := -std=c++14 PZSTD_CPPFLAGS = $(PZSTD_INC) PZSTD_CCXXFLAGS = From 42b02f5185393e5f71abaa4c532684de3569be85 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 18 Mar 2024 09:28:06 -0700 Subject: [PATCH 270/283] [cmake] Emit warnings for contradictory build settings Document that the `ZSTD_BUILD_{SHARED,STATIC}` take precedence over `BUILD_SHARED_LIBS` when exactly one is ON. Thanks to @teo-tsirpanis for pointing out the potentially confusing behavior. --- build/cmake/lib/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index eb21b8b3259..5d514ccb2e9 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -152,11 +152,17 @@ if (ZSTD_BUILD_STATIC) add_definition(libzstd_static ZDICTLIB_STATIC_API) endif () if (ZSTD_BUILD_SHARED AND NOT ZSTD_BUILD_STATIC) + if (NOT BUILD_SHARED_LIBS) + message(WARNING "BUILD_SHARED_LIBS is OFF, but ZSTD_BUILD_SHARED is ON and ZSTD_BUILD_STATIC is OFF, which takes precedence, so libzstd is a shared library") + endif () add_library(libzstd INTERFACE) target_link_libraries(libzstd INTERFACE libzstd_shared) list(APPEND library_targets libzstd) endif () if (ZSTD_BUILD_STATIC AND NOT ZSTD_BUILD_SHARED) + if (BUILD_SHARED_LIBS) + message(WARNING "BUILD_SHARED_LIBS is ON, but ZSTD_BUILD_SHARED is OFF and ZSTD_BUILD_STATIC is ON, which takes precedence, is set so libzstd is a static library") + endif () add_library(libzstd INTERFACE) target_link_libraries(libzstd INTERFACE libzstd_static) list(APPEND library_targets libzstd) From f5728da365e14a715a131434847f732ee84d8719 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 18 Mar 2024 12:04:02 -0700 Subject: [PATCH 271/283] update targetCBlockSize documentation --- lib/compress/zstd_compress_superblock.c | 4 +-- lib/zstd.h | 38 ++++++++++++++----------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index 8c466c47952..628a2dccd09 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -469,8 +469,6 @@ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs, return n; } -#define CBLOCK_TARGET_SIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */ - /** ZSTD_compressSubBlock_multi() : * Breaks super-block into multiple sub-blocks and compresses them. * Entropy will be written into the first block. @@ -504,7 +502,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, const BYTE* llCodePtr = seqStorePtr->llCode; const BYTE* mlCodePtr = seqStorePtr->mlCode; const BYTE* ofCodePtr = seqStorePtr->ofCode; - size_t const minTarget = CBLOCK_TARGET_SIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */ + size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */ size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize); int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed); int writeSeqEntropy = 1; diff --git a/lib/zstd.h b/lib/zstd.h index ba611656ff6..115d7f2acca 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -262,9 +262,9 @@ ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer * /*! ZSTD_compressCCtx() : * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. - * Important : in order to behave similarly to `ZSTD_compress()`, - * this function compresses at requested compression level, - * __ignoring any other parameter__ . + * Important : in order to mirror `ZSTD_compress()` behavior, + * this function compresses at the requested compression level, + * __ignoring any other advanced parameter__ . * If any advanced parameter was set using the advanced API, * they will all be reset. Only `compressionLevel` remains. */ @@ -286,7 +286,7 @@ ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer * /*! ZSTD_decompressDCtx() : * Same as ZSTD_decompress(), * requires an allocated ZSTD_DCtx. - * Compatible with sticky parameters. + * Compatible with sticky parameters (see below). */ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, @@ -302,12 +302,12 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, * using ZSTD_CCtx_set*() functions. * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! - * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ . * * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). * * This API supersedes all other "advanced" API entry points in the experimental section. - * In the future, we expect to remove from experimental API entry points which are redundant with this API. + * In the future, we expect to remove API entry points from experimental which are redundant with this API. */ @@ -390,16 +390,19 @@ typedef enum { * The higher the value of selected strategy, the more complex it is, * resulting in stronger and slower compression. * Special: value 0 means "use default strategy". */ - ZSTD_c_targetCBlockSize=130, /* Tries to fit compressed block size to be - * around targetCBlockSize. No target when - * targetCBlockSize == 0. There is no guarantee - * on compressed block size (default:0). - * Since the decoder has to buffer a complete - * block to begin decoding it, in low band- - * width streaming environments this may - * improve end-to-end latency. Bound by - * ZSTD_TARGETCBLOCKSIZE_MIN and - * ZSTD_TARGETCBLOCKSIZE_MAX. */ + + ZSTD_c_targetCBlockSize=130, /* v1.5.6+ + * Attempts to fit compressed block size into approximatively targetCBlockSize. + * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX. + * Note that it's not a guarantee, just a convergence target (default:0). + * No target when targetCBlockSize == 0. + * This is helpful in low bandwidth streaming environments to improve end-to-end latency, + * when a client can make use of partial documents (a prominent example being Chrome). + * Note: this parameter is stable since v1.5.6. + * It was present as an experimental parameter in earlier versions, + * but we don't recomment using it with earlier library versions + * due to massive performance regressions. + */ /* LDM mode parameters */ ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. * This parameter is designed to improve compression ratio @@ -584,6 +587,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); /*! ZSTD_compress2() : * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * (note that this entry point doesn't even expose a compression level parameter). * ZSTD_compress2() always starts a new frame. * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() @@ -1250,7 +1254,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) /* Advanced parameter bounds */ -#define ZSTD_TARGETCBLOCKSIZE_MIN 64 +#define ZSTD_TARGETCBLOCKSIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */ #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX #define ZSTD_SRCSIZEHINT_MIN 0 #define ZSTD_SRCSIZEHINT_MAX INT_MAX From 5d82c2b57c0f5f239ba712a7e6ec46c84a6ba02d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 18 Mar 2024 12:17:41 -0700 Subject: [PATCH 272/283] add a paragraph on UB DCtx state after error --- lib/zstd.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/zstd.h b/lib/zstd.h index 115d7f2acca..d27e593179b 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -904,6 +904,12 @@ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); * @return : 0 when a frame is completely decoded and fully flushed, * or an error code, which can be tested using ZSTD_isError(), * or any other value > 0, which means there is some decoding or flushing to do to complete current frame. + * + * Note: when an operation returns with an error code, the @zds state if left in undefined state. + * It's UB to invoke `ZSTD_decompressStream()` on such a state. + * In order to re-use such a state, it must be reset first, + * which can be done explicitly (`ZSTD_DCtx_reset()`), + * or is sometimes implied (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`) */ ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); From 902c7ec1fe833f7f8d542fe94acba9e3a0a013a1 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 18 Mar 2024 12:30:35 -0700 Subject: [PATCH 273/283] add doc on CCtx UB state --- lib/zstd.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/zstd.h b/lib/zstd.h index d27e593179b..b13e7e95998 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -791,6 +791,11 @@ typedef enum { * only ZSTD_e_end or ZSTD_e_flush operations are allowed. * Before starting a new compression job, or changing compression parameters, * it is required to fully flush internal buffers. + * - note: if an operation ends with an error, it may leave @cctx in an undefined state. + * Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state. + * In order to be re-employed after an error, a state must be reset, + * which can be done explicitly (ZSTD_CCtx_reset()), + * or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx()) */ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, ZSTD_outBuffer* output, @@ -905,11 +910,11 @@ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); * or an error code, which can be tested using ZSTD_isError(), * or any other value > 0, which means there is some decoding or flushing to do to complete current frame. * - * Note: when an operation returns with an error code, the @zds state if left in undefined state. + * Note: when an operation returns with an error code, the @zds state may be left in undefined state. * It's UB to invoke `ZSTD_decompressStream()` on such a state. - * In order to re-use such a state, it must be reset first, + * In order to re-use such a state, it must be first reset, * which can be done explicitly (`ZSTD_DCtx_reset()`), - * or is sometimes implied (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`) + * or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`) */ ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); From 3d18d9a9ce5fd5a03c6389b17ee464cf2cf60e94 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 18 Mar 2024 12:30:54 -0700 Subject: [PATCH 274/283] updated API manual --- doc/zstd_manual.html | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 58fe958dc0b..48db40e6ba2 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -190,9 +190,9 @@

    Compression context

      When compressing many times,
                        const void* src, size_t srcSize,
                              int compressionLevel);
     

    Same as ZSTD_compress(), using an explicit ZSTD_CCtx. - Important : in order to behave similarly to `ZSTD_compress()`, - this function compresses at requested compression level, - __ignoring any other parameter__ . + Important : in order to mirror `ZSTD_compress()` behavior, + this function compresses at the requested compression level, + __ignoring any other advanced parameter__ . If any advanced parameter was set using the advanced API, they will all be reset. Only `compressionLevel` remains. @@ -212,7 +212,7 @@

    Decompression context

      When decompressing many times,
                          const void* src, size_t srcSize);
     

    Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx. - Compatible with sticky parameters. + Compatible with sticky parameters (see below).


    @@ -296,6 +296,19 @@

    Decompression context

      When decompressing many times,
                                   * The higher the value of selected strategy, the more complex it is,
                                   * resulting in stronger and slower compression.
                                   * Special: value 0 means "use default strategy". */
    +
    +    ZSTD_c_targetCBlockSize=130, /* v1.5.6+
    +                                  * Attempts to fit compressed block size into approximatively targetCBlockSize.
    +                                  * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX.
    +                                  * Note that it's not a guarantee, just a convergence target (default:0).
    +                                  * No target when targetCBlockSize == 0.
    +                                  * This is helpful in low bandwidth streaming environments to improve end-to-end latency,
    +                                  * when a client can make use of partial documents (a prominent example being Chrome).
    +                                  * Note: this parameter is stable since v1.5.6.
    +                                  * It was present as an experimental parameter in earlier versions,
    +                                  * but we don't recomment using it with earlier library versions
    +                                  * due to massive performance regressions.
    +                                  */
         /* LDM mode parameters */
         ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
                                          * This parameter is designed to improve compression ratio
    @@ -375,7 +388,6 @@ 

    Decompression context

      When decompressing many times,
          * ZSTD_c_forceMaxWindow
          * ZSTD_c_forceAttachDict
          * ZSTD_c_literalCompressionMode
    -     * ZSTD_c_targetCBlockSize
          * ZSTD_c_srcSizeHint
          * ZSTD_c_enableDedicatedDictSearch
          * ZSTD_c_stableInBuffer
    @@ -396,7 +408,7 @@ 

    Decompression context

      When decompressing many times,
          ZSTD_c_experimentalParam3=1000,
          ZSTD_c_experimentalParam4=1001,
          ZSTD_c_experimentalParam5=1002,
    -     ZSTD_c_experimentalParam6=1003,
    +     /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */
          ZSTD_c_experimentalParam7=1004,
          ZSTD_c_experimentalParam8=1005,
          ZSTD_c_experimentalParam9=1006,
    @@ -483,6 +495,7 @@ 

    Decompression context

      When decompressing many times,
                            void* dst, size_t dstCapacity,
                      const void* src, size_t srcSize);
     

    Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + (note that this entry point doesn't even expose a compression level parameter). ZSTD_compress2() always starts a new frame. Should cctx hold data from a previously unfinished frame, everything about it is forgotten. - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() @@ -668,6 +681,11 @@

    Streaming compression functions

    typedef enum {
                 only ZSTD_e_end or ZSTD_e_flush operations are allowed.
                 Before starting a new compression job, or changing compression parameters,
                 it is required to fully flush internal buffers.
    +  - note: if an operation ends with an error, it may leave @cctx in an undefined state.
    +          Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state.
    +          In order to be re-employed after an error, a state must be reset,
    +          which can be done explicitly (ZSTD_CCtx_reset()),
    +          or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx())
      
     


    @@ -753,6 +771,12 @@

    Streaming decompression functions


    @return : 0 when a frame is completely decoded and fully flushed, or an error code, which can be tested using ZSTD_isError(), or any other value > 0, which means there is some decoding or flushing to do to complete current frame. + + Note: when an operation returns with an error code, the @zds state may be left in undefined state. + It's UB to invoke `ZSTD_decompressStream()` on such a state. + In order to re-use such a state, it must be first reset, + which can be done explicitly (`ZSTD_DCtx_reset()`), + or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`)


    From c5da438dc0ca81ce697a73a02b060e3ba7550bab Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 18 Mar 2024 12:33:22 -0700 Subject: [PATCH 275/283] fix typo --- doc/zstd_manual.html | 2 +- lib/zstd.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 48db40e6ba2..bc4a2403648 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -306,7 +306,7 @@

    Decompression context

      When decompressing many times,
                                       * when a client can make use of partial documents (a prominent example being Chrome).
                                       * Note: this parameter is stable since v1.5.6.
                                       * It was present as an experimental parameter in earlier versions,
    -                                  * but we don't recomment using it with earlier library versions
    +                                  * but it's not recommended using it with earlier library versions
                                       * due to massive performance regressions.
                                       */
         /* LDM mode parameters */
    diff --git a/lib/zstd.h b/lib/zstd.h
    index b13e7e95998..b110874b708 100644
    --- a/lib/zstd.h
    +++ b/lib/zstd.h
    @@ -400,7 +400,7 @@ typedef enum {
                                       * when a client can make use of partial documents (a prominent example being Chrome).
                                       * Note: this parameter is stable since v1.5.6.
                                       * It was present as an experimental parameter in earlier versions,
    -                                  * but we don't recomment using it with earlier library versions
    +                                  * but it's not recommended using it with earlier library versions
                                       * due to massive performance regressions.
                                       */
         /* LDM mode parameters */
    
    From 6f1215b874dbf74b50dcb64915e91e11ba198008 Mon Sep 17 00:00:00 2001
    From: Yann Collet 
    Date: Mon, 18 Mar 2024 14:10:08 -0700
    Subject: [PATCH 276/283] fix ZSTD_TARGETCBLOCKSIZE_MIN test
    
    when requested CBlockSize is too low,
    bound it to the minimum
    instead of returning an error.
    ---
     lib/compress/zstd_compress.c | 16 +++++++++-------
     1 file changed, 9 insertions(+), 7 deletions(-)
    
    diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
    index 451f2f91e6f..38c91473ef9 100644
    --- a/lib/compress/zstd_compress.c
    +++ b/lib/compress/zstd_compress.c
    @@ -870,7 +870,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
     #else
             FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
             CCtxParams->nbWorkers = value;
    -        return CCtxParams->nbWorkers;
    +        return (size_t)(CCtxParams->nbWorkers);
     #endif
     
         case ZSTD_c_jobSize :
    @@ -894,7 +894,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
     #else
             FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
             CCtxParams->overlapLog = value;
    -        return CCtxParams->overlapLog;
    +        return (size_t)CCtxParams->overlapLog;
     #endif
     
         case ZSTD_c_rsyncable :
    @@ -904,7 +904,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
     #else
             FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
             CCtxParams->rsyncable = value;
    -        return CCtxParams->rsyncable;
    +        return (size_t)CCtxParams->rsyncable;
     #endif
     
         case ZSTD_c_enableDedicatedDictSearch :
    @@ -941,8 +941,10 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
             return CCtxParams->ldmParams.hashRateLog;
     
         case ZSTD_c_targetCBlockSize :
    -        if (value!=0)   /* 0 ==> default */
    +        if (value!=0) {  /* 0 ==> default */
    +            value = MAX(value, ZSTD_TARGETCBLOCKSIZE_MIN);
                 BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
    +        }
             CCtxParams->targetCBlockSize = (U32)value;
             return CCtxParams->targetCBlockSize;
     
    @@ -970,7 +972,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
         case ZSTD_c_validateSequences:
             BOUNDCHECK(ZSTD_c_validateSequences, value);
             CCtxParams->validateSequences = value;
    -        return CCtxParams->validateSequences;
    +        return (size_t)CCtxParams->validateSequences;
     
         case ZSTD_c_useBlockSplitter:
             BOUNDCHECK(ZSTD_c_useBlockSplitter, value);
    @@ -985,7 +987,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
         case ZSTD_c_deterministicRefPrefix:
             BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
             CCtxParams->deterministicRefPrefix = !!value;
    -        return CCtxParams->deterministicRefPrefix;
    +        return (size_t)CCtxParams->deterministicRefPrefix;
     
         case ZSTD_c_prefetchCDictTables:
             BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
    @@ -995,7 +997,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
         case ZSTD_c_enableSeqProducerFallback:
             BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
             CCtxParams->enableMatchFinderFallback = value;
    -        return CCtxParams->enableMatchFinderFallback;
    +        return (size_t)CCtxParams->enableMatchFinderFallback;
     
         case ZSTD_c_maxBlockSize:
             if (value!=0)    /* 0 ==> default */
    
    From 6a0052a409e2604bd40354b76b86272b712edd7d Mon Sep 17 00:00:00 2001
    From: Yonatan Komornik <11005061+yoniko@users.noreply.github.com>
    Date: Mon, 18 Mar 2024 15:36:40 -0700
    Subject: [PATCH 277/283] Fix bugs in simple decompression fuzzer (#3978)
    
    Fixes 2 issue in `simple_decompress.c`:
    1. Wrong type used for storing the results of `ZSTD_findDecompressedSize` resulting in never matching to `ZSTD_CONTENTSIZE_ERROR` or `ZSTD_CONTENTSIZE_UNKNOWN`.
    
    2. Experimental API is used (`ZSTD_findDecompressedSize`) without defining `ZSTD_STATIC_LINKING_ONLY`.
    ---
     tests/fuzz/simple_decompress.c | 5 ++++-
     1 file changed, 4 insertions(+), 1 deletion(-)
    
    diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c
    index 0ee61902cdb..ab4697ff1fc 100644
    --- a/tests/fuzz/simple_decompress.c
    +++ b/tests/fuzz/simple_decompress.c
    @@ -16,6 +16,9 @@
     #include 
     #include 
     #include 
    +
    +#define ZSTD_STATIC_LINKING_ONLY
    +
     #include "fuzz_helpers.h"
     #include "zstd.h"
     #include "fuzz_data_producer.h"
    @@ -40,7 +43,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
         size_t const dSize = ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
         if (!ZSTD_isError(dSize)) {
             /* If decompression was successful, the content size from the frame header(s) should be valid. */
    -        size_t const expectedSize = ZSTD_findDecompressedSize(src, size);
    +        unsigned long long const expectedSize = ZSTD_findDecompressedSize(src, size);
             FUZZ_ASSERT(expectedSize != ZSTD_CONTENTSIZE_ERROR);
             FUZZ_ASSERT(expectedSize == ZSTD_CONTENTSIZE_UNKNOWN || expectedSize == dSize);
         }
    
    From 3487a60950ea01e89883a3e807a18a6e155768b7 Mon Sep 17 00:00:00 2001
    From: Yonatan Komornik 
    Date: Mon, 18 Mar 2024 15:25:22 -0700
    Subject: [PATCH 278/283] Fail on errors when building fuzzers
    
    Fails on errors when building fuzzers with `fuzz.py` (adds `Werror`).
    Currently allows `declaration-after-statement`, `c++-compat` and
    `deprecated` as they are abundant in code (some fixes to
    `declaration-after-statement` are presented in this commit).
    ---
     tests/fuzz/fuzz.py              |  7 ++++++-
     tests/fuzz/fuzz_data_producer.c | 10 +++++-----
     tests/fuzz/regression_driver.c  |  3 ++-
     tests/fuzz/simple_decompress.c  | 21 +++++++++++----------
     tests/fuzz/stream_round_trip.c  |  2 +-
     5 files changed, 25 insertions(+), 18 deletions(-)
    
    diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
    index c489b8fa646..7e1f3c9df07 100755
    --- a/tests/fuzz/fuzz.py
    +++ b/tests/fuzz/fuzz.py
    @@ -405,7 +405,12 @@ def build(args):
         cxxflags = shlex.split(args.cxxflags)
         mflags = shlex.split(args.mflags)
         # Flags to be added to both cflags and cxxflags
    -    common_flags = []
    +    common_flags = [
    +        '-Werror',
    +        '-Wno-error=declaration-after-statement',
    +        '-Wno-error=c++-compat',
    +        '-Wno-error=deprecated' # C files are sometimes compiled with CXX
    +    ]
     
         cppflags += [
             '-DDEBUGLEVEL={}'.format(args.debug),
    diff --git a/tests/fuzz/fuzz_data_producer.c b/tests/fuzz/fuzz_data_producer.c
    index bf846b68f72..056de3ee950 100644
    --- a/tests/fuzz/fuzz_data_producer.c
    +++ b/tests/fuzz/fuzz_data_producer.c
    @@ -28,12 +28,12 @@ void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer) { free(producer); }
     
     uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min,
                                       uint32_t max) {
    -    FUZZ_ASSERT(min <= max);
    -
         uint32_t range = max - min;
         uint32_t rolling = range;
         uint32_t result = 0;
     
    +    FUZZ_ASSERT(min <= max);
    +
         while (rolling > 0 && producer->size > 0) {
           uint8_t next = *(producer->data + producer->size - 1);
           producer->size -= 1;
    @@ -79,11 +79,11 @@ int FUZZ_dataProducer_empty(FUZZ_dataProducer_t *producer) {
     
     size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize)
     {
    -    newSize = newSize > producer->size ? producer->size : newSize;
    +    const size_t effectiveNewSize = newSize > producer->size ? producer->size : newSize;
     
    -    size_t remaining = producer->size - newSize;
    +    size_t remaining = producer->size - effectiveNewSize;
         producer->data = producer->data + remaining;
    -    producer->size = newSize;
    +    producer->size = effectiveNewSize;
         return remaining;
     }
     
    diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c
    index 550c65d8600..26e2b6af4f9 100644
    --- a/tests/fuzz/regression_driver.c
    +++ b/tests/fuzz/regression_driver.c
    @@ -44,11 +44,12 @@ int main(int argc, char const **argv) {
         fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]);
       for (i = 0; i < files->tableSize; ++i) {
         char const *fileName = files->fileNames[i];
    -    DEBUGLOG(3, "Running %s", fileName);
         size_t const fileSize = UTIL_getFileSize(fileName);
         size_t readSize;
         FILE *file;
     
    +    DEBUGLOG(3, "Running %s", fileName);
    +
         /* Check that it is a regular file, and that the fileSize is valid.
          * If it is not a regular file, then it may have been deleted since we
          * constructed the list, so just skip it, but return an error exit code.
    diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c
    index ab4697ff1fc..0dc9e5b7c5b 100644
    --- a/tests/fuzz/simple_decompress.c
    +++ b/tests/fuzz/simple_decompress.c
    @@ -37,17 +37,18 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
             FUZZ_ASSERT(dctx);
         }
     
    -    size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
    -    void *rBuf = FUZZ_malloc(bufSize);
    -
    -    size_t const dSize = ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
    -    if (!ZSTD_isError(dSize)) {
    -        /* If decompression was successful, the content size from the frame header(s) should be valid. */
    -        unsigned long long const expectedSize = ZSTD_findDecompressedSize(src, size);
    -        FUZZ_ASSERT(expectedSize != ZSTD_CONTENTSIZE_ERROR);
    -        FUZZ_ASSERT(expectedSize == ZSTD_CONTENTSIZE_UNKNOWN || expectedSize == dSize);
    +    {
    +        size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
    +        void *rBuf = FUZZ_malloc(bufSize);
    +        size_t const dSize = ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
    +        if (!ZSTD_isError(dSize)) {
    +            /* If decompression was successful, the content size from the frame header(s) should be valid. */
    +            unsigned long long const expectedSize = ZSTD_findDecompressedSize(src, size);
    +            FUZZ_ASSERT(expectedSize != ZSTD_CONTENTSIZE_ERROR);
    +            FUZZ_ASSERT(expectedSize == ZSTD_CONTENTSIZE_UNKNOWN || expectedSize == dSize);
    +        }
    +        free(rBuf);
         }
    -    free(rBuf);
     
         FUZZ_dataProducer_free(producer);
     
    diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c
    index c2d6707a1ce..6e340c81d2d 100644
    --- a/tests/fuzz/stream_round_trip.c
    +++ b/tests/fuzz/stream_round_trip.c
    @@ -136,7 +136,7 @@ static size_t compress(uint8_t *dst, size_t capacity,
         return dstSize;
     }
     
    -size_t decompress(void* dst, size_t dstCapacity, void const* src, size_t srcSize, FUZZ_dataProducer_t* producer)
    +static size_t decompress(void* dst, size_t dstCapacity, void const* src, size_t srcSize, FUZZ_dataProducer_t* producer)
     {
         ZSTD_inBuffer in = {src, srcSize, 0};
         ZSTD_outBuffer out = {dst, dstCapacity, 0};
    
    From f62b2663b96d440d3b9dd50b40dc911f9e0083d3 Mon Sep 17 00:00:00 2001
    From: Elliot Gorokhovsky 
    Date: Tue, 19 Mar 2024 14:05:23 -0700
    Subject: [PATCH 279/283] Add docs on how to add a new fuzzer
    
    ---
     tests/fuzz/README.md | 42 ++++++++++++++++++++++++++++++++++++++++++
     1 file changed, 42 insertions(+)
    
    diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md
    index 2a9bd4570b6..e2196e832c6 100644
    --- a/tests/fuzz/README.md
    +++ b/tests/fuzz/README.md
    @@ -117,3 +117,45 @@ CC=clang CXX=clang++ ./fuzz.py build all --enable-msan
     ## Fuzzing a custom sequence producer plugin
     Sequence producer plugin authors can use the zstd fuzzers to stress-test their code.
     See the documentation in `fuzz_third_party_seq_prod.h` for details.
    +
    +## Adding a new fuzzer
    +There are several steps involved in adding a new fuzzer harness.
    +
    +### Build your harness
    +1. Create a new your fuzzer harness `tests/fuzz/your_harness.c`.
    +
    +2. Add your harness to the Makefile
    +
    +    2.1 Follow [this example](https://github.com/facebook/zstd/blob/e124e39301381de8f323436a3e4c46539747ba24/tests/fuzz/Makefile#L216) if your fuzzer requires both compression and decompression symbols (prefix `rt_`). If your fuzzer only requires decompression symbols, follow [this example](https://github.com/facebook/zstd/blob/6a0052a409e2604bd40354b76b86272b712edd7d/tests/fuzz/Makefile#L194) (prefix `d_`).
    +    
    +    2.2 Add your target to [`FUZZ_TARGETS`](https://github.com/facebook/zstd/blob/6a0052a409e2604bd40354b76b86272b712edd7d/tests/fuzz/Makefile#L108).
    +    
    +3. Add your harness to [`fuzz.py`](https://github.com/facebook/zstd/blob/6a0052a409e2604bd40354b76b86272b712edd7d/tests/fuzz/fuzz.py#L48).
    +
    +### Generate seed data
    +Follow the instructions above to generate seed data:
    +```
    +make -C ../tests decodecorpus
    +./fuzz.py gen your_harness
    +```
    +
    +### Run the harness
    +Follow the instructions above to run your harness and fix any crashes:
    +```
    +./fuzz.py build your_harness --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++
    +./fuzz.py libfuzzer your_harness
    +```
    +
    +### Minimize and zip the corpus
    +After running the fuzzer for a while, you will have a large corpus at `tests/fuzz/corpora/your_harness*`.
    +This corpus must be minimized and zipped before uploading to GitHub for regression testing:
    +```
    +./fuzz.py minimize your_harness
    +./fuzz.py zip your_harness 
    +```
    +
    +### Upload the zip file to GitHub
    +The previous step should produce a `.zip` file containing the corpus for your new harness.
    +This corpus must be uploaded to GitHub here: https://github.com/facebook/zstd/releases/tag/fuzz-corpora
    +
    +
    
    From 741b87bbe1c7c7e7292742f3b1ed9c4055c4743c Mon Sep 17 00:00:00 2001
    From: Elliot Gorokhovsky 
    Date: Wed, 20 Mar 2024 19:22:34 -0400
    Subject: [PATCH 280/283] Fuzzing and bugfixes for magicless-format decoding
     (#3976)
    
    * fuzzing and bugfixes for magicless format
    
    * reset dctx before each decompression
    
    * do not memcmp empty buffers
    
    * nit: decompressor errata
    ---
     CHANGELOG                            |   1 +
     doc/decompressor_errata.md           |  21 +++++
     lib/decompress/zstd_decompress.c     |   8 +-
     tests/fuzz/Makefile                  |   6 +-
     tests/fuzz/decompress_cross_format.c | 130 +++++++++++++++++++++++++++
     tests/fuzz/fuzz.py                   |   1 +
     6 files changed, 163 insertions(+), 4 deletions(-)
     create mode 100644 tests/fuzz/decompress_cross_format.c
    
    diff --git a/CHANGELOG b/CHANGELOG
    index 23c0128203f..afb80ed9ea7 100644
    --- a/CHANGELOG
    +++ b/CHANGELOG
    @@ -9,6 +9,7 @@ lib: accept dictionaries with partial literal tables, by @terrelln
     lib: fix CCtx size estimation with external sequence producer, by @embg
     lib: fix corner case decoder behaviors, by @Cyan4973 and @aimuz
     lib: fix zdict prototype mismatch in static_only mode, by @ldv-alt
    +lib: fix several bugs in magicless-format decoding, by @embg
     cli: add common compressed file types to `--exclude-compressed`` by @daniellerozenblit
     cli: fix mixing `-c` and `-o` commands with `--rm`, by @Cyan4973
     cli: fix erroneous exclusion of hidden files with `--output-dir-mirror` by @felixhandte
    diff --git a/doc/decompressor_errata.md b/doc/decompressor_errata.md
    index 83d4071cb4d..b570f73145d 100644
    --- a/doc/decompressor_errata.md
    +++ b/doc/decompressor_errata.md
    @@ -125,3 +125,24 @@ The total `Block_Content` is `5` bytes, and `Last_Table_Offset` is `2`.
     See the compressor workaround code:
     
     https://github.com/facebook/zstd/blob/8814aa5bfa74f05a86e55e9d508da177a893ceeb/lib/compress/zstd_compress.c#L2667-L2682
    +
    +Magicless format
    +----------------------
    +
    +**Last affected version**: v1.5.5
    +
    +**Affected decompressor component(s)**: Library
    +
    +**Produced by the reference compressor**: Yes (example: https://gist.github.com/embg/9940726094f4cf2cef162cffe9319232)
    +
    +**Example Frame**: `27 b5 2f fd 00 03 19 00 00 66 6f 6f 3f ba c4 59`
    +
    +v1.5.6 fixes several bugs in which the magicless-format decoder rejects valid frames.
    +These include but are not limited to:
    +* Valid frames that happen to begin with a legacy magic number (little-endian)
    +* Valid frames that happen to begin with a skippable magic number (little-endian)
    +
    +If you are affected by this issue and cannot update to v1.5.6 or later, there is a
    +workaround to recover affected data. Simply prepend the ZSTD magic number
    +`0xFD2FB528` (little-endian) to your data and decompress using the standard-format
    +decoder.
    diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
    index ee2cda3b639..2f03cf7b0c7 100644
    --- a/lib/decompress/zstd_decompress.c
    +++ b/lib/decompress/zstd_decompress.c
    @@ -1085,7 +1085,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
         while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
     
     #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
    -        if (ZSTD_isLegacy(src, srcSize)) {
    +        if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) {
                 size_t decodedSize;
                 size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
                 if (ZSTD_isError(frameSize)) return frameSize;
    @@ -1115,7 +1115,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
             }
     #endif
     
    -        if (srcSize >= 4) {
    +        if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) {
                 U32 const magicNumber = MEM_readLE32(src);
                 DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
                 if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
    @@ -1412,6 +1412,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
         case ZSTDds_decodeSkippableHeader:
             assert(src != NULL);
             assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
    +        assert(dctx->format != ZSTD_f_zstd1_magicless);
             ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
             dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE);   /* note : dctx->expected can grow seriously large, beyond local buffer size */
             dctx->stage = ZSTDds_skipFrame;
    @@ -2209,7 +2210,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                 DEBUGLOG(4, "Consume header");
                 FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
     
    -            if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
    +            if (zds->format == ZSTD_f_zstd1
    +                && (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
                     zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
                     zds->stage = ZSTDds_skipFrame;
                 } else {
    diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile
    index f96adcfaea7..2f5a25fb114 100644
    --- a/tests/fuzz/Makefile
    +++ b/tests/fuzz/Makefile
    @@ -124,7 +124,8 @@ FUZZ_TARGETS :=       \
     	sequence_compression_api \
     	seekable_roundtrip \
     	huf_round_trip \
    -	huf_decompress
    +	huf_decompress \
    +	decompress_cross_format
     
     all: libregression.a $(FUZZ_TARGETS)
     
    @@ -238,6 +239,9 @@ huf_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o
     
     huf_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o
     	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o $(LIB_FUZZING_ENGINE) -o $@
    +	
    +decompress_cross_format: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o
    +	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o $(LIB_FUZZING_ENGINE) -o $@
     
     libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
     	$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o
    diff --git a/tests/fuzz/decompress_cross_format.c b/tests/fuzz/decompress_cross_format.c
    new file mode 100644
    index 00000000000..78461e697b1
    --- /dev/null
    +++ b/tests/fuzz/decompress_cross_format.c
    @@ -0,0 +1,130 @@
    +/*
    + * Copyright (c) Meta Platforms, Inc. and affiliates.
    + * All rights reserved.
    + *
    + * This source code is licensed under both the BSD-style license (found in the
    + * LICENSE file in the root directory of this source tree) and the GPLv2 (found
    + * in the COPYING file in the root directory of this source tree).
    + * You may select, at your option, one of the above-listed licenses.
    + */
    +
    +// This fuzz target validates decompression of magicless-format compressed data.
    +
    +#include 
    +#include 
    +#include 
    +#include 
    +#include "fuzz_helpers.h"
    +#define ZSTD_STATIC_LINKING_ONLY
    +#include "zstd.h"
    +#include "fuzz_data_producer.h"
    +
    +static ZSTD_DCtx *dctx = NULL;
    +
    +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
    +{
    +    // Give a random portion of src data to the producer, to use for parameter generation.
    +    // The rest will be interpreted as magicless compressed data.
    +    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
    +    size_t magiclessSize = FUZZ_dataProducer_reserveDataPrefix(producer);
    +    const void* const magiclessSrc = src;
    +    size_t const dstSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
    +    void* const standardDst = FUZZ_malloc(dstSize);
    +    void* const magiclessDst = FUZZ_malloc(dstSize);
    +
    +    // Create standard-format src from magicless-format src
    +    const uint32_t zstd_magic = ZSTD_MAGICNUMBER;
    +    size_t standardSize = sizeof(zstd_magic) + magiclessSize;
    +    void* const standardSrc = FUZZ_malloc(standardSize);
    +    memcpy(standardSrc, &zstd_magic, sizeof(zstd_magic)); // assume fuzzing on little-endian machine
    +    memcpy(standardSrc + sizeof(zstd_magic), magiclessSrc, magiclessSize);
    +
    +    // Truncate to a single frame
    +    {
    +        const size_t standardFrameCompressedSize = ZSTD_findFrameCompressedSize(standardSrc, standardSize);
    +        if (ZSTD_isError(standardFrameCompressedSize)) {
    +            goto cleanup_and_return;
    +        }
    +        standardSize = standardFrameCompressedSize;
    +        magiclessSize = standardFrameCompressedSize - sizeof(zstd_magic);
    +    }
    +
    +    // Create DCtx if needed
    +    if (!dctx) {
    +        dctx = ZSTD_createDCtx();
    +        FUZZ_ASSERT(dctx);
    +    }
    +
    +    // Test one-shot decompression
    +    {
    +        FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
    +        FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1));
    +        const size_t standardRet = ZSTD_decompressDCtx(
    +                                        dctx, standardDst, dstSize, standardSrc, standardSize);
    +
    +        FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
    +        FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless));
    +        const size_t magiclessRet = ZSTD_decompressDCtx(
    +                                        dctx, magiclessDst, dstSize, magiclessSrc, magiclessSize);
    +
    +        // Standard accepts => magicless should accept
    +        if (!ZSTD_isError(standardRet)) FUZZ_ZASSERT(magiclessRet);
    +
    +        // Magicless accepts => standard should accept
    +        // NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy.
    +        if (!ZSTD_isError(magiclessRet)) FUZZ_ZASSERT(standardRet);
    +
    +        // If both accept, decompressed size and data should match
    +        if (!ZSTD_isError(standardRet) && !ZSTD_isError(magiclessRet)) {
    +            FUZZ_ASSERT(standardRet == magiclessRet);
    +            if (standardRet > 0) {
    +                FUZZ_ASSERT(
    +                    memcmp(standardDst, magiclessDst, standardRet) == 0
    +                );
    +            }
    +        }
    +    }
    +
    +    // Test streaming decompression
    +    {
    +        ZSTD_inBuffer standardIn = { standardSrc, standardSize, 0 };
    +        ZSTD_inBuffer magiclessIn = { magiclessSrc, magiclessSize, 0 };
    +        ZSTD_outBuffer standardOut = { standardDst, dstSize, 0 };
    +        ZSTD_outBuffer magiclessOut = { magiclessDst, dstSize, 0 };
    +
    +        FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
    +        FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1));
    +        const size_t standardRet = ZSTD_decompressStream(dctx, &standardOut, &standardIn);
    +
    +        FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
    +        FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless));
    +        const size_t magiclessRet = ZSTD_decompressStream(dctx, &magiclessOut, &magiclessIn);
    +
    +        // Standard accepts => magicless should accept
    +        if (standardRet == 0) FUZZ_ASSERT(magiclessRet == 0);
    +
    +        // Magicless accepts => standard should accept
    +        // NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy.
    +        if (magiclessRet == 0) FUZZ_ASSERT(standardRet == 0);
    +
    +        // If both accept, decompressed size and data should match
    +        if (standardRet == 0 && magiclessRet == 0) {
    +            FUZZ_ASSERT(standardOut.pos == magiclessOut.pos);
    +            if (standardOut.pos > 0) {
    +                FUZZ_ASSERT(
    +                    memcmp(standardOut.dst, magiclessOut.dst, standardOut.pos) == 0
    +                );
    +            }
    +        }
    +    }
    +
    +cleanup_and_return:
    +#ifndef STATEFUL_FUZZING
    +    ZSTD_freeDCtx(dctx); dctx = NULL;
    +#endif
    +    free(standardSrc);
    +    free(standardDst);
    +    free(magiclessDst);
    +    FUZZ_dataProducer_free(producer);
    +    return 0;
    +}
    diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
    index c489b8fa646..f321002a7a0 100755
    --- a/tests/fuzz/fuzz.py
    +++ b/tests/fuzz/fuzz.py
    @@ -65,6 +65,7 @@ def __init__(self, input_type, frame_type=FrameType.ZSTD):
         'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
         'huf_round_trip': TargetInfo(InputType.RAW_DATA),
         'huf_decompress': TargetInfo(InputType.RAW_DATA),
    +    'decompress_cross_format': TargetInfo(InputType.RAW_DATA),
     }
     TARGETS = list(TARGET_INFO.keys())
     ALL_TARGETS = TARGETS + ['all']
    
    From 731f4b70fcd22fc9badd4e51dc6d939ee6da6c54 Mon Sep 17 00:00:00 2001
    From: Nick Terrell 
    Date: Tue, 19 Mar 2024 12:37:55 -0700
    Subject: [PATCH 281/283] Fix & fuzz ZSTD_generateSequences
    
    This function was seriously flawed:
    * It didn't do output bounds checks
    * It produced invalid sequences when an uncompressed or RLE block was emitted
    * It produced invalid sequences when the block splitter was enabled
    * It produced invalid sequences when ZSTD_c_targetCBlockSize was enabled
    
    I've attempted to fix these issues, but this function is just a bad idea,
    so I've marked it as deprecated and unsafe. We should replace it with
    `ZSTD_extractSequences()` which operates on a compressed frame.
    ---
     CHANGELOG                       |   1 +
     lib/compress/zstd_compress.c    | 127 +++++++++++++++++++++-----------
     lib/compress/zstdmt_compress.c  |   4 +-
     lib/zstd.h                      |  33 ++++++---
     tests/fuzz/Makefile             |   8 +-
     tests/fuzz/fuzz.py              |   1 +
     tests/fuzz/generate_sequences.c |  88 ++++++++++++++++++++++
     tests/fuzzer.c                  |  25 +++++++
     8 files changed, 231 insertions(+), 56 deletions(-)
     create mode 100644 tests/fuzz/generate_sequences.c
    
    diff --git a/CHANGELOG b/CHANGELOG
    index afb80ed9ea7..33f43410f03 100644
    --- a/CHANGELOG
    +++ b/CHANGELOG
    @@ -33,6 +33,7 @@ port: sparc64 support validation in CI, by @Cyan4973
     port: AIX compatibility, by @likema
     port: HP-UX compatibility, by @likema
     doc: Improved specification accuracy, by @elasota
    +bug: Fix and deprecate ZSTD_generateSequences (#3981)
     
     v1.5.5 (Apr 2023)
     fix: fix rare corruption bug affecting the high compression mode, reported by @danlark1 (#3517, @terrelln)
    diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
    index 451f2f91e6f..f6a84e6b729 100644
    --- a/lib/compress/zstd_compress.c
    +++ b/lib/compress/zstd_compress.c
    @@ -3361,29 +3361,38 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
         return ZSTDbss_compress;
     }
     
    -static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
    +static size_t ZSTD_copyBlockSequences(SeqCollector* seqCollector, const seqStore_t* seqStore, const U32 prevRepcodes[ZSTD_REP_NUM])
     {
    -    const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
    -    const seqDef* seqStoreSeqs = seqStore->sequencesStart;
    -    size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
    -    size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
    -    size_t literalsRead = 0;
    -    size_t lastLLSize;
    +    const seqDef* inSeqs = seqStore->sequencesStart;
    +    const size_t nbInSequences = seqStore->sequences - inSeqs;
    +    const size_t nbInLiterals = (size_t)(seqStore->lit - seqStore->litStart);
     
    -    ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
    +    ZSTD_Sequence* outSeqs = seqCollector->seqIndex == 0 ? seqCollector->seqStart : seqCollector->seqStart + seqCollector->seqIndex;
    +    const size_t nbOutSequences = nbInSequences + 1;
    +    size_t nbOutLiterals = 0;
    +    repcodes_t repcodes;
         size_t i;
    -    repcodes_t updatedRepcodes;
     
    -    assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
    -    /* Ensure we have enough space for last literals "sequence" */
    -    assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
    -    ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
    -    for (i = 0; i < seqStoreSeqSize; ++i) {
    -        U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM;
    -        outSeqs[i].litLength = seqStoreSeqs[i].litLength;
    -        outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;
    +    /* Bounds check that we have enough space for every input sequence
    +     * and the block delimiter
    +     */
    +    assert(seqCollector->seqIndex <= seqCollector->maxSequences);
    +    RETURN_ERROR_IF(
    +        nbOutSequences > (size_t)(seqCollector->maxSequences - seqCollector->seqIndex),
    +        dstSize_tooSmall,
    +        "Not enough space to copy sequences");
    +
    +    ZSTD_memcpy(&repcodes, prevRepcodes, sizeof(repcodes));
    +    for (i = 0; i < nbInSequences; ++i) {
    +        U32 rawOffset;
    +        outSeqs[i].litLength = inSeqs[i].litLength;
    +        outSeqs[i].matchLength = inSeqs[i].mlBase + MINMATCH;
             outSeqs[i].rep = 0;
     
    +        /* Handle the possible single length >= 64K
    +         * There can only be one because we add MINMATCH to every match length,
    +         * and blocks are at most 128K.
    +         */
             if (i == seqStore->longLengthPos) {
                 if (seqStore->longLengthType == ZSTD_llt_literalLength) {
                     outSeqs[i].litLength += 0x10000;
    @@ -3392,41 +3401,55 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
                 }
             }
     
    -        if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) {
    -            /* Derive the correct offset corresponding to a repcode */
    -            outSeqs[i].rep = seqStoreSeqs[i].offBase;
    +        /* Determine the raw offset given the offBase, which may be a repcode. */
    +        if (OFFBASE_IS_REPCODE(inSeqs[i].offBase)) {
    +            const U32 repcode = OFFBASE_TO_REPCODE(inSeqs[i].offBase);
    +            assert(repcode > 0);
    +            outSeqs[i].rep = repcode;
                 if (outSeqs[i].litLength != 0) {
    -                rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
    +                rawOffset = repcodes.rep[repcode - 1];
                 } else {
    -                if (outSeqs[i].rep == 3) {
    -                    rawOffset = updatedRepcodes.rep[0] - 1;
    +                if (repcode == 3) {
    +                    assert(repcodes.rep[0] > 1);
    +                    rawOffset = repcodes.rep[0] - 1;
                     } else {
    -                    rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
    +                    rawOffset = repcodes.rep[repcode];
                     }
                 }
    +        } else {
    +            rawOffset = OFFBASE_TO_OFFSET(inSeqs[i].offBase);
             }
             outSeqs[i].offset = rawOffset;
    -        /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
    -           so we provide seqStoreSeqs[i].offset - 1 */
    -        ZSTD_updateRep(updatedRepcodes.rep,
    -                       seqStoreSeqs[i].offBase,
    -                       seqStoreSeqs[i].litLength == 0);
    -        literalsRead += outSeqs[i].litLength;
    +
    +        /* Update repcode history for the sequence */
    +        ZSTD_updateRep(repcodes.rep,
    +                       inSeqs[i].offBase,
    +                       inSeqs[i].litLength == 0);
    +
    +        nbOutLiterals += outSeqs[i].litLength;
         }
         /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
          * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
          * for the block boundary, according to the API.
          */
    -    assert(seqStoreLiteralsSize >= literalsRead);
    -    lastLLSize = seqStoreLiteralsSize - literalsRead;
    -    outSeqs[i].litLength = (U32)lastLLSize;
    -    outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
    -    seqStoreSeqSize++;
    -    zc->seqCollector.seqIndex += seqStoreSeqSize;
    +    assert(nbInLiterals >= nbOutLiterals);
    +    {
    +        const size_t lastLLSize = nbInLiterals - nbOutLiterals;
    +        outSeqs[nbInSequences].litLength = (U32)lastLLSize;
    +        outSeqs[nbInSequences].matchLength = 0;
    +        outSeqs[nbInSequences].offset = 0;
    +        assert(nbOutSequences == nbInSequences + 1);
    +    }
    +    seqCollector->seqIndex += nbOutSequences;
    +    assert(seqCollector->seqIndex <= seqCollector->maxSequences);
    +
    +    return 0;
     }
     
     size_t ZSTD_sequenceBound(size_t srcSize) {
    -    return (srcSize / ZSTD_MINMATCH_MIN) + 1;
    +    const size_t maxNbSeq = (srcSize / ZSTD_MINMATCH_MIN) + 1;
    +    const size_t maxNbDelims = (srcSize / ZSTD_BLOCKSIZE_MAX_MIN) + 1;
    +    return maxNbSeq + maxNbDelims;
     }
     
     size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
    @@ -3435,6 +3458,16 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
         const size_t dstCapacity = ZSTD_compressBound(srcSize);
         void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
         SeqCollector seqCollector;
    +    {
    +        int targetCBlockSize;
    +        FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_targetCBlockSize, &targetCBlockSize), "");
    +        RETURN_ERROR_IF(targetCBlockSize != 0, parameter_unsupported, "targetCBlockSize != 0");
    +    }
    +    {
    +        int nbWorkers;
    +        FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_nbWorkers, &nbWorkers), "");
    +        RETURN_ERROR_IF(nbWorkers != 0, parameter_unsupported, "nbWorkers != 0");
    +    }
     
         RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
     
    @@ -3444,8 +3477,12 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
         seqCollector.maxSequences = outSeqsSize;
         zc->seqCollector = seqCollector;
     
    -    ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
    -    ZSTD_customFree(dst, ZSTD_defaultCMem);
    +    {
    +        const size_t ret = ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
    +        ZSTD_customFree(dst, ZSTD_defaultCMem);
    +        FORWARD_IF_ERROR(ret, "ZSTD_compress2 failed");
    +    }
    +    assert(zc->seqCollector.seqIndex <= ZSTD_sequenceBound(srcSize));
         return zc->seqCollector.seqIndex;
     }
     
    @@ -4038,8 +4075,9 @@ ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc,
             cSeqsSize = 1;
         }
     
    +    /* Sequence collection not supported when block splitting */
         if (zc->seqCollector.collectSequences) {
    -        ZSTD_copyBlockSequences(zc);
    +        FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, seqStore, dRepOriginal.rep), "copyBlockSequences failed");
             ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
             return 0;
         }
    @@ -4261,6 +4299,7 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
             if (bss == ZSTDbss_noCompress) {
                 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
                     zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
    +            RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block");
                 cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
                 FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
                 DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
    @@ -4293,11 +4332,15 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
     
         {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
             FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
    -        if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
    +        if (bss == ZSTDbss_noCompress) {
    +            RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block");
    +            cSize = 0;
    +            goto out;
    +        }
         }
     
         if (zc->seqCollector.collectSequences) {
    -        ZSTD_copyBlockSequences(zc);
    +        FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, ZSTD_getSeqStore(zc), zc->blockState.prevCBlock->rep), "copyBlockSequences failed");
             ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
             return 0;
         }
    diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
    index e86fdb2bafd..86ccce31849 100644
    --- a/lib/compress/zstdmt_compress.c
    +++ b/lib/compress/zstdmt_compress.c
    @@ -121,7 +121,7 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
     
     static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
     {
    -    ZSTDMT_bufferPool* const bufPool = 
    +    ZSTDMT_bufferPool* const bufPool =
             (ZSTDMT_bufferPool*)ZSTD_customCalloc(sizeof(ZSTDMT_bufferPool), cMem);
         if (bufPool==NULL) return NULL;
         if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
    @@ -380,7 +380,7 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
     static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
                                                   ZSTD_customMem cMem)
     {
    -    ZSTDMT_CCtxPool* const cctxPool = 
    +    ZSTDMT_CCtxPool* const cctxPool =
             (ZSTDMT_CCtxPool*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtxPool), cMem);
         assert(nbWorkers > 0);
         if (!cctxPool) return NULL;
    diff --git a/lib/zstd.h b/lib/zstd.h
    index ba611656ff6..9b3876126c5 100644
    --- a/lib/zstd.h
    +++ b/lib/zstd.h
    @@ -1538,25 +1538,38 @@ typedef enum {
     ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize);
     
     /*! ZSTD_generateSequences() :
    + * WARNING: This function is meant for debugging and informational purposes ONLY!
    + * Its implementation is flawed, and it will be deleted in a future version.
    + * It is not guaranteed to succeed, as there are several cases where it will give
    + * up and fail. You should NOT use this function in production code.
    + *
    + * This function is deprecated, and will be removed in a future version.
    + *
      * Generate sequences using ZSTD_compress2(), given a source buffer.
      *
    + * @param zc The compression context to be used for ZSTD_compress2(). Set any
    + *           compression parameters you need on this context.
    + * @param outSeqs The output sequences buffer of size @p outSeqsSize
    + * @param outSeqsSize The size of the output sequences buffer.
    + *                    ZSTD_sequenceBound(srcSize) is an upper bound on the number
    + *                    of sequences that can be generated.
    + * @param src The source buffer to generate sequences from of size @p srcSize.
    + * @param srcSize The size of the source buffer.
    + *
      * Each block will end with a dummy sequence
      * with offset == 0, matchLength == 0, and litLength == length of last literals.
      * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
      * simply acts as a block delimiter.
      *
    - * @zc can be used to insert custom compression params.
    - * This function invokes ZSTD_compress2().
    - *
    - * The output of this function can be fed into ZSTD_compressSequences() with CCtx
    - * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
    - * @return : number of sequences generated
    + * @returns The number of sequences generated, necessarily less than
    + *          ZSTD_sequenceBound(srcSize), or an error code that can be checked
    + *          with ZSTD_isError().
      */
    -
    +ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()")
     ZSTDLIB_STATIC_API size_t
    -ZSTD_generateSequences( ZSTD_CCtx* zc,
    -                        ZSTD_Sequence* outSeqs, size_t outSeqsSize,
    -                        const void* src, size_t srcSize);
    +ZSTD_generateSequences(ZSTD_CCtx* zc,
    +                       ZSTD_Sequence* outSeqs, size_t outSeqsSize,
    +                       const void* src, size_t srcSize);
     
     /*! ZSTD_mergeBlockDelimiters() :
      * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
    diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile
    index 2f5a25fb114..430f6df15cf 100644
    --- a/tests/fuzz/Makefile
    +++ b/tests/fuzz/Makefile
    @@ -125,7 +125,8 @@ FUZZ_TARGETS :=       \
     	seekable_roundtrip \
     	huf_round_trip \
     	huf_decompress \
    -	decompress_cross_format
    +	decompress_cross_format \
    +	generate_sequences
     
     all: libregression.a $(FUZZ_TARGETS)
     
    @@ -239,10 +240,13 @@ huf_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o
     
     huf_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o
     	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o $(LIB_FUZZING_ENGINE) -o $@
    -	
    +
     decompress_cross_format: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o
     	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o $(LIB_FUZZING_ENGINE) -o $@
     
    +generate_sequences: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_generate_sequences.o
    +	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_generate_sequences.o $(LIB_FUZZING_ENGINE) -o $@
    +
     libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
     	$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o
     
    diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
    index f321002a7a0..d6b99171f66 100755
    --- a/tests/fuzz/fuzz.py
    +++ b/tests/fuzz/fuzz.py
    @@ -66,6 +66,7 @@ def __init__(self, input_type, frame_type=FrameType.ZSTD):
         'huf_round_trip': TargetInfo(InputType.RAW_DATA),
         'huf_decompress': TargetInfo(InputType.RAW_DATA),
         'decompress_cross_format': TargetInfo(InputType.RAW_DATA),
    +    'generate_sequences': TargetInfo(InputType.RAW_DATA),
     }
     TARGETS = list(TARGET_INFO.keys())
     ALL_TARGETS = TARGETS + ['all']
    diff --git a/tests/fuzz/generate_sequences.c b/tests/fuzz/generate_sequences.c
    new file mode 100644
    index 00000000000..1cc57e844c7
    --- /dev/null
    +++ b/tests/fuzz/generate_sequences.c
    @@ -0,0 +1,88 @@
    +/*
    + * Copyright (c) Meta Platforms, Inc. and affiliates.
    + * All rights reserved.
    + *
    + * This source code is licensed under both the BSD-style license (found in the
    + * LICENSE file in the root directory of this source tree) and the GPLv2 (found
    + * in the COPYING file in the root directory of this source tree).
    + * You may select, at your option, one of the above-listed licenses.
    + */
    +
    +#define ZSTD_STATIC_LINKING_ONLY
    +
    +#include 
    +#include 
    +#include 
    +#include 
    +
    +#include "fuzz_data_producer.h"
    +#include "fuzz_helpers.h"
    +#include "zstd_helpers.h"
    +
    +/**
    + * This fuzz target ensures that ZSTD_generateSequences() does not crash and
    + * if it succeeds that ZSTD_compressSequences() round trips.
    + */
    +
    +static void testRoundTrip(ZSTD_CCtx* cctx, ZSTD_Sequence const* seqs, size_t nbSeqs, const void* src, size_t srcSize) {
    +  /* Compress the sequences with block delimiters */
    +  const size_t compressBound = ZSTD_compressBound(srcSize);
    +  void* dst = FUZZ_malloc(compressBound);
    +  FUZZ_ASSERT(dst);
    +
    +  size_t compressedSize = ZSTD_compressSequences(cctx, dst, compressBound, seqs, nbSeqs, src, srcSize);
    +  FUZZ_ZASSERT(compressedSize);
    +
    +  void* decompressed = FUZZ_malloc(srcSize);
    +  FUZZ_ASSERT(srcSize == 0 || decompressed);
    +  size_t decompressedSize = ZSTD_decompress(decompressed, srcSize, dst, compressedSize);
    +  FUZZ_ZASSERT(decompressedSize);
    +  FUZZ_ASSERT(decompressedSize == srcSize);
    +  if (srcSize != 0) {
    +    FUZZ_ASSERT(!memcmp(src, decompressed, srcSize));
    +  }
    +
    +  free(decompressed);
    +  free(dst);
    +}
    +
    +int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
    +
    +  FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size);
    +  size = FUZZ_dataProducer_reserveDataPrefix(producer);
    +
    +  ZSTD_CCtx* cctx = ZSTD_createCCtx();
    +  FUZZ_ASSERT(cctx);
    +
    +  const size_t seqsCapacity = FUZZ_dataProducer_uint32Range(producer, 0, 2 * ZSTD_sequenceBound(size));
    +  ZSTD_Sequence* seqs = (ZSTD_Sequence*)FUZZ_malloc(sizeof(ZSTD_Sequence) * seqsCapacity);
    +  FUZZ_ASSERT(seqsCapacity == 0 || seqs);
    +
    +  FUZZ_setRandomParameters(cctx, size, producer);
    +  FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetCBlockSize, 0));
    +  FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0));
    +
    +  const size_t nbSeqs = ZSTD_generateSequences(cctx, seqs, seqsCapacity, data, size);
    +  if (ZSTD_isError(nbSeqs)) {
    +    /* Allowed to error if the destination is too small */
    +    if (ZSTD_getErrorCode(nbSeqs) == ZSTD_error_dstSize_tooSmall) {
    +        FUZZ_ASSERT(seqsCapacity < ZSTD_sequenceBound(size));
    +    }
    +  } else {
    +    /* Ensure we round trip with and without block delimiters*/
    +
    +    FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters));
    +    testRoundTrip(cctx, seqs, nbSeqs, data, size);
    +
    +    const size_t nbMergedSeqs = ZSTD_mergeBlockDelimiters(seqs, nbSeqs);
    +    FUZZ_ASSERT(nbMergedSeqs <= nbSeqs);
    +    FUZZ_ZASSERT(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only));
    +    FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters));
    +    testRoundTrip(cctx, seqs, nbMergedSeqs, data, size);
    +  }
    +
    +  free(seqs);
    +  ZSTD_freeCCtx(cctx);
    +  FUZZ_dataProducer_free(producer);
    +  return 0;
    +}
    diff --git a/tests/fuzzer.c b/tests/fuzzer.c
    index 09349218111..f7bdae90e9a 100644
    --- a/tests/fuzzer.c
    +++ b/tests/fuzzer.c
    @@ -3701,6 +3701,31 @@ static int basicUnitTests(U32 const seed, double compressibility)
         }
         DISPLAYLEVEL(3, "OK \n");
     
    +    DISPLAYLEVEL(3, "test%3i : ZSTD_generateSequences too small output buffer : ", testNb++);
    +    {
    +        const size_t seqsCapacity = 10;
    +        const size_t srcSize = 150 KB;
    +        const BYTE* src = (BYTE*)CNBuffer;
    +
    +        ZSTD_CCtx* const cctx = ZSTD_createCCtx();
    +        ZSTD_Sequence* const seqs = (ZSTD_Sequence*)malloc(seqsCapacity * sizeof(ZSTD_Sequence));
    +
    +        if (seqs == NULL) goto _output_error;
    +        if (cctx == NULL) goto _output_error;
    +        /* Populate src with random data */
    +        RDG_genBuffer(CNBuffer, srcSize, compressibility, 0.5, seed);
    +
    +        /* Test with block delimiters roundtrip */
    +        {
    +            size_t const seqsSize = ZSTD_generateSequences(cctx, seqs, seqsCapacity, src, srcSize);
    +            if (!ZSTD_isError(seqsSize)) goto _output_error;
    +        }
    +
    +        ZSTD_freeCCtx(cctx);
    +        free(seqs);
    +    }
    +    DISPLAYLEVEL(3, "OK \n");
    +
         DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences followed by ZSTD_compressSequences : ", testNb++);
         {
             const size_t srcSize = 500 KB;
    
    From dc1f7b560b23f5bd50a0fcddd677007c9c76ec0b Mon Sep 17 00:00:00 2001
    From: Elliot Gorokhovsky 
    Date: Thu, 21 Mar 2024 15:16:38 -0400
    Subject: [PATCH 282/283] fix -Werror=pointer-arith in fuzzers (#3983)
    
    ---
     tests/fuzz/decompress_cross_format.c | 8 ++++----
     1 file changed, 4 insertions(+), 4 deletions(-)
    
    diff --git a/tests/fuzz/decompress_cross_format.c b/tests/fuzz/decompress_cross_format.c
    index 78461e697b1..da10702a8ac 100644
    --- a/tests/fuzz/decompress_cross_format.c
    +++ b/tests/fuzz/decompress_cross_format.c
    @@ -27,15 +27,15 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
         // The rest will be interpreted as magicless compressed data.
         FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
         size_t magiclessSize = FUZZ_dataProducer_reserveDataPrefix(producer);
    -    const void* const magiclessSrc = src;
    +    const uint8_t* const magiclessSrc = src;
         size_t const dstSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
    -    void* const standardDst = FUZZ_malloc(dstSize);
    -    void* const magiclessDst = FUZZ_malloc(dstSize);
    +    uint8_t* const standardDst = (uint8_t*)FUZZ_malloc(dstSize);
    +    uint8_t* const magiclessDst = (uint8_t*)FUZZ_malloc(dstSize);
     
         // Create standard-format src from magicless-format src
         const uint32_t zstd_magic = ZSTD_MAGICNUMBER;
         size_t standardSize = sizeof(zstd_magic) + magiclessSize;
    -    void* const standardSrc = FUZZ_malloc(standardSize);
    +    uint8_t* const standardSrc = (uint8_t*)FUZZ_malloc(standardSize);
         memcpy(standardSrc, &zstd_magic, sizeof(zstd_magic)); // assume fuzzing on little-endian machine
         memcpy(standardSrc + sizeof(zstd_magic), magiclessSrc, magiclessSize);
     
    
    From 273d1279cab66ac9bccc862da17e35ee547d7610 Mon Sep 17 00:00:00 2001
    From: Yann Collet 
    Date: Thu, 21 Mar 2024 13:25:48 -0700
    Subject: [PATCH 283/283] try to silence some scorecard warnings
    
    ---
     .github/workflows/commit.yml  | 1 +
     .github/workflows/nightly.yml | 1 +
     2 files changed, 2 insertions(+)
    
    diff --git a/.github/workflows/commit.yml b/.github/workflows/commit.yml
    index 5fc8cb1fd5e..25d8c52f9ef 100644
    --- a/.github/workflows/commit.yml
    +++ b/.github/workflows/commit.yml
    @@ -3,6 +3,7 @@ on:
       push:
         branches:
         - dev
    +permissions: read-all
     jobs:
       short-tests-0:
         runs-on: ubuntu-latest
    diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
    index 9206a07fd50..704e78922c3 100644
    --- a/.github/workflows/nightly.yml
    +++ b/.github/workflows/nightly.yml
    @@ -7,6 +7,7 @@ on:
         - release
         - dev
         - master
    +permissions: read-all
     jobs:
       regression-test:
         runs-on: ubuntu-latest