From b918109005702681af21887c1fa6c9610b650d78 Mon Sep 17 00:00:00 2001
From: slaren <2141330+slaren@users.noreply.github.com>
Date: Wed, 10 May 2023 23:27:59 +0200
Subject: [PATCH 1/2] testing clang-tidy

test

test

test

test

test

test

Revert "test"

This reverts commit 6aa68df4c38798d99f891891e0cbd702c532edb1.

test

test

test
---
 .clang-tidy                        |  19 +
 .github/workflows/build.yml        | 589 -----------------------------
 .github/workflows/docker.yml       |  65 ----
 .github/workflows/editorconfig.yml |  17 -
 .github/workflows/tidy-review.yml  |  23 ++
 5 files changed, 42 insertions(+), 671 deletions(-)
 create mode 100644 .clang-tidy
 delete mode 100644 .github/workflows/build.yml
 delete mode 100644 .github/workflows/docker.yml
 delete mode 100644 .github/workflows/editorconfig.yml
 create mode 100644 .github/workflows/tidy-review.yml

diff --git a/.clang-tidy b/.clang-tidy
new file mode 100644
index 00000000000..d123553ef32
--- /dev/null
+++ b/.clang-tidy
@@ -0,0 +1,19 @@
+---
+Checks: >
+    bugprone-*,
+    -bugprone-easily-swappable-parameters,
+    -bugprone-implicit-widening-of-multiplication-result,
+    -bugprone-narrowing-conversions,
+    readability-*,
+    -readability-avoid-unconditional-preprocessor-if,
+    -readability-function-cognitive-complexity,
+    -readability-identifier-length,
+    -readability-implicit-bool-conversion,
+    -readability-magic-numbers,
+    -readability-uppercase-literal-suffix,
+    clang-analyzer-*,
+    -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
+    concurrency-*,
+    performance-*,
+    portability-*,
+FormatStyle: none
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
deleted file mode 100644
index a5938bf9368..00000000000
--- a/.github/workflows/build.yml
+++ /dev/null
@@ -1,589 +0,0 @@
-name: CI
-
-on:
-  workflow_dispatch: # allows manual triggering
-    inputs:
-      create_release:
-        description: 'Create new release'
-        required: true
-        type: boolean
-  push:
-    branches:
-      - master
-    paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp']
-  pull_request:
-    types: [opened, synchronize, reopened]
-    paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp']
-
-env:
- BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
-
-jobs:
-  ubuntu-focal-make:
-    runs-on: ubuntu-20.04
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v1
-
-      - name: Dependencies
-        id: depends
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential gcc-8
-
-      - name: Build
-        id: make_build
-        run: |
-          CC=gcc-8 make
-
-  ubuntu-latest-cmake:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v1
-
-      - name: Dependencies
-        id: depends
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential
-
-      - name: Build
-        id: cmake_build
-        run: |
-          mkdir build
-          cd build
-          cmake ..
-          cmake --build . --config Release
-
-      - name: Test
-        id: cmake_test
-        run: |
-          cd build
-          ctest --verbose
-
-  ubuntu-latest-cmake-sanitizer:
-    runs-on: ubuntu-latest
-
-    continue-on-error: true
-
-    strategy:
-      matrix:
-        sanitizer: [ADDRESS, THREAD, UNDEFINED]
-        build_type: [Debug, Release]
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v1
-
-      - name: Dependencies
-        id: depends
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential
-
-      - name: Build
-        id: cmake_build
-        run: |
-          mkdir build
-          cd build
-          cmake .. -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
-          cmake --build . --config ${{ matrix.build_type }}
-
-      - name: Test
-        id: cmake_test
-        run: |
-          cd build
-          ctest --verbose
-
-  macOS-latest-make:
-    runs-on: macos-latest
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v1
-
-      - name: Dependencies
-        id: depends
-        run: |
-          brew update
-
-      - name: Build
-        id: make_build
-        run: |
-          make
-
-  macOS-latest-cmake:
-    runs-on: macos-latest
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v1
-
-      - name: Dependencies
-        id: depends
-        run: |
-          brew update
-
-      - name: Build
-        id: cmake_build
-        run: |
-          mkdir build
-          cd build
-          cmake -DLLAMA_AVX2=OFF ..
-          cmake --build . --config Release
-
-      - name: Test
-        id: cmake_test
-        run: |
-          cd build
-          ctest --verbose
-
-  windows-latest-cmake:
-    runs-on: windows-latest
-    env:
-      OPENBLAS_VERSION: 0.3.23
-      OPENCL_VERSION: 2023.04.17
-      CLBLAST_VERSION: 1.5.3
-
-    strategy:
-      matrix:
-        include:
-          - build: 'avx2'
-            defines: ''
-          - build: 'avx'
-            defines: '-DLLAMA_AVX2=OFF'
-          - build: 'avx512'
-            defines: '-DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
-          - build: 'clblast'
-            defines: '-DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
-          - build: 'openblas'
-            defines: '-DLLAMA_OPENBLAS=ON -DBLAS_LIBRARIES="/LIBPATH:$env:RUNNER_TEMP/openblas/lib" -DOPENBLAS_INC="$env:RUNNER_TEMP/openblas/include"'
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v1
-
-      - name: Download OpenCL SDK
-        id: get_opencl
-        if: ${{ matrix.build == 'clblast' }}
-        run: |
-          curl.exe -o $env:RUNNER_TEMP/opencl.zip -L "https://github.com/KhronosGroup/OpenCL-SDK/releases/download/v${env:OPENCL_VERSION}/OpenCL-SDK-v${env:OPENCL_VERSION}-Win-x64.zip"
-          mkdir $env:RUNNER_TEMP/opencl
-          tar.exe -xvf $env:RUNNER_TEMP/opencl.zip --strip-components=1 -C $env:RUNNER_TEMP/opencl
-
-      - name: Download CLBlast
-        id: get_clblast
-        if: ${{ matrix.build == 'clblast' }}
-        run: |
-          curl.exe -o $env:RUNNER_TEMP/clblast.zip -L "https://github.com/CNugteren/CLBlast/releases/download/${env:CLBLAST_VERSION}/CLBlast-${env:CLBLAST_VERSION}-Windows-x64.zip"
-          curl.exe -o $env:RUNNER_TEMP/CLBlast.LICENSE.txt -L "https://github.com/CNugteren/CLBlast/raw/${env:CLBLAST_VERSION}/LICENSE"
-          mkdir $env:RUNNER_TEMP/clblast
-          tar.exe -xvf $env:RUNNER_TEMP/clblast.zip -C $env:RUNNER_TEMP/clblast
-          foreach ($f in (gci -Recurse -Path "$env:RUNNER_TEMP/clblast" -Filter '*.cmake')) {
-            $txt = Get-Content -Path $f -Raw
-            $txt.Replace('C:/dependencies/opencl/', "$($env:RUNNER_TEMP.Replace('\','/'))/opencl/") | Set-Content -Path $f -Encoding UTF8
-          }
-
-      - name: Download OpenBLAS
-        id: get_openblas
-        if: ${{ matrix.build == 'openblas' }}
-        run: |
-          curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
-          curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
-          mkdir $env:RUNNER_TEMP/openblas
-          tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
-          $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
-          $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
-          $lib =  $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
-          & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
-
-      - name: Build
-        id: cmake_build
-        run: |
-          mkdir build
-          cd build
-          cmake .. ${{ matrix.defines }}
-          cmake --build . --config Release
-          cp ../LICENSE ./bin/Release/llama.cpp.txt
-
-      - name: Add clblast.dll
-        id: add_clblast_dll
-        if: ${{ matrix.build == 'clblast' }}
-        run: |
-          cp $env:RUNNER_TEMP/clblast/lib/clblast.dll ./build/bin/Release
-          cp $env:RUNNER_TEMP/CLBlast.LICENSE.txt ./build/bin/Release/CLBlast-${env:CLBLAST_VERSION}.txt
-
-      - name: Add libopenblas.dll
-        id: add_libopenblas_dll
-        if: ${{ matrix.build == 'openblas' }}
-        run: |
-          cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
-          cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
-
-      - name: Check AVX512F support
-        id: check_avx512f
-        if: ${{ matrix.build == 'avx512' }}
-        continue-on-error: true
-        run: |
-          cd build
-          $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
-          $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
-          $cl =  $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
-          echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
-          & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
-          .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
-
-      - name: Test
-        id: cmake_test
-        if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # Test AVX-512 only when possible
-        run: |
-          cd build
-          ctest -C Release --verbose
-
-      - name: Get commit hash
-        id: commit
-        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
-        uses: pr-mpt/actions-commit-hash@v2
-
-      - name: Pack artifacts
-        id: pack_artifacts
-        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
-        run: |
-          7z a llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip .\build\bin\Release\*
-
-      - name: Upload artifacts
-        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
-        uses: actions/upload-artifact@v3
-        with:
-          path: |
-            llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip
-
-  windows-latest-cmake-cublas:
-    runs-on: windows-latest
-
-    strategy:
-      matrix:
-        cuda: ['12.1.0', '11.7.1']
-        build: ['cublas']
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v1
-
-      - uses: Jimver/cuda-toolkit@v0.2.10
-        id: cuda-toolkit
-        with:
-          cuda: ${{ matrix.cuda }}
-          # TODO(green-sky): _dev seems to fail, and non dev are not enought
-          #sub-packages: '["nvcc", "cudart", "cublas", "cudart_dev", "cublas_dev"]'
-
-      - name: Build
-        id: cmake_build
-        run: |
-          mkdir build
-          cd build
-          cmake .. -DLLAMA_CUBLAS=ON
-          cmake --build . --config Release
-
-      - name: Get commit hash
-        id: commit
-        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
-        uses: pr-mpt/actions-commit-hash@v2
-
-      - name: Pack artifacts
-        id: pack_artifacts
-        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
-        run: |
-          7z a llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
-
-      - name: Upload artifacts
-        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
-        uses: actions/upload-artifact@v3
-        with:
-          path: |
-            llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
-
-      - name: Copy and pack Cuda runtime
-        if: ${{ matrix.cuda == '12.1.0' }}
-        # TODO(green-sky): paths are cuda 12 specific
-        run: |
-          echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
-          mkdir '.\build\bin\cudart\'
-          cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cudart64_12.dll" '.\build\bin\cudart\'
-          cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublas64_12.dll" '.\build\bin\cudart\'
-          cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublasLt64_12.dll" '.\build\bin\cudart\'
-          7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip .\build\bin\cudart\*
-
-      - name: Copy and pack Cuda runtime
-        if: ${{ matrix.cuda == '11.7.1' }}
-        # TODO(green-sky): paths are cuda 11 specific
-        run: |
-          echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
-          mkdir '.\build\bin\cudart\'
-          ls "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin"
-          cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cudart64_110.dll" '.\build\bin\cudart\'
-          cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublas64_11.dll" '.\build\bin\cudart\'
-          cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublasLt64_11.dll" '.\build\bin\cudart\'
-          7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip .\build\bin\cudart\*
-
-      - name: Upload Cuda runtime
-        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
-        uses: actions/upload-artifact@v3
-        with:
-          path: |
-            cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
-
-  release:
-    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
-
-    runs-on: ubuntu-latest
-
-    needs:
-      - ubuntu-focal-make
-      - ubuntu-latest-cmake
-      - macOS-latest-make
-      - macOS-latest-cmake
-      - windows-latest-cmake
-      - windows-latest-cmake-cublas
-
-    steps:
-      - name: Download artifacts
-        id: download-artifact
-        uses: actions/download-artifact@v3
-
-      - name: Get commit hash
-        id: commit
-        uses: pr-mpt/actions-commit-hash@v2
-
-      - name: Create release
-        id: create_release
-        uses: anzz1/action-create-release@v1
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        with:
-          tag_name: ${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}
-
-      - name: Upload release
-        id: upload_release
-        uses: actions/github-script@v3
-        with:
-          github-token: ${{secrets.GITHUB_TOKEN}}
-          script: |
-            const path = require('path');
-            const fs = require('fs');
-            const release_id = '${{ steps.create_release.outputs.id }}';
-            for (let file of await fs.readdirSync('./artifact')) {
-              if (path.extname(file) === '.zip') {
-                console.log('uploadReleaseAsset', file);
-                await github.repos.uploadReleaseAsset({
-                  owner: context.repo.owner,
-                  repo: context.repo.repo,
-                  release_id: release_id,
-                  name: file,
-                  data: await fs.readFileSync(`./artifact/${file}`)
-                });
-              }
-            }
-
-#  ubuntu-latest-gcc:
-#    runs-on: ubuntu-latest
-#
-#    strategy:
-#      matrix:
-#        build: [Debug, Release]
-#
-#    steps:
-#      - name: Clone
-#        uses: actions/checkout@v1
-#
-#      - name: Dependencies
-#        run: |
-#          sudo apt-get update
-#          sudo apt-get install build-essential
-#          sudo apt-get install cmake
-#
-#      - name: Configure
-#        run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-#
-#      - name: Build
-#        run: |
-#          make
-#
-#  ubuntu-latest-clang:
-#    runs-on: ubuntu-latest
-#
-#    strategy:
-#      matrix:
-#        build: [Debug, Release]
-#
-#    steps:
-#      - name: Clone
-#        uses: actions/checkout@v1
-#
-#      - name: Dependencies
-#        run: |
-#          sudo apt-get update
-#          sudo apt-get install build-essential
-#          sudo apt-get install cmake
-#
-#      - name: Configure
-#        run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
-#
-#      - name: Build
-#        run: |
-#          make
-#
-#  ubuntu-latest-gcc-sanitized:
-#    runs-on: ubuntu-latest
-#
-#    strategy:
-#      matrix:
-#        sanitizer: [ADDRESS, THREAD, UNDEFINED]
-#
-#    steps:
-#      - name: Clone
-#        uses: actions/checkout@v1
-#
-#      - name: Dependencies
-#        run: |
-#          sudo apt-get update
-#          sudo apt-get install build-essential
-#          sudo apt-get install cmake
-#
-#      - name: Configure
-#        run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON
-#
-#      - name: Build
-#        run: |
-#          make
-#
-#  windows:
-#    runs-on: windows-latest
-#
-#    strategy:
-#      matrix:
-#        build: [Release]
-#        arch: [Win32, x64]
-#        include:
-#          - arch: Win32
-#            s2arc: x86
-#          - arch: x64
-#            s2arc: x64
-#
-#    steps:
-#      - name: Clone
-#        uses: actions/checkout@v1
-#
-#      - name: Add msbuild to PATH
-#        uses: microsoft/setup-msbuild@v1
-#
-#      - name: Configure
-#        run: >
-#          cmake -S . -B ./build -A ${{ matrix.arch }}
-#          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-#
-#      - name: Build
-#        run: |
-#          cd ./build
-#          msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
-#
-#      - name: Upload binaries
-#        uses: actions/upload-artifact@v1
-#        with:
-#          name: llama-bin-${{ matrix.arch }}
-#          path: build/bin/${{ matrix.build }}
-#
-#  windows-blas:
-#    runs-on: windows-latest
-#
-#    strategy:
-#      matrix:
-#        build: [Release]
-#        arch: [Win32, x64]
-#        blas: [ON]
-#        include:
-#          - arch: Win32
-#            obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
-#            s2arc: x86
-#          - arch: x64
-#            obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
-#            s2arc: x64
-#
-#    steps:
-#      - name: Clone
-#        uses: actions/checkout@v1
-#
-#      - name: Add msbuild to PATH
-#        uses: microsoft/setup-msbuild@v1
-#
-#      - name: Fetch OpenBLAS
-#        if: matrix.blas == 'ON'
-#        run: |
-#          C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
-#          7z x blas.zip -oblas -y
-#          copy blas/include/cblas.h .
-#          copy blas/include/openblas_config.h .
-#          echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
-#
-#      - name: Configure
-#        run: >
-#          cmake -S . -B ./build -A ${{ matrix.arch }}
-#          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-#          -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }}
-#          -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
-#
-#      - name: Build
-#        run: |
-#          cd ./build
-#          msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
-#
-#      - name: Copy libopenblas.dll
-#        if: matrix.blas == 'ON'
-#        run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
-#
-#      - name: Upload binaries
-#        if: matrix.blas == 'ON'
-#        uses: actions/upload-artifact@v1
-#        with:
-#          name: llama-blas-bin-${{ matrix.arch }}
-#          path: build/bin/${{ matrix.build }}
-#
-#  emscripten:
-#    runs-on: ubuntu-latest
-#
-#    strategy:
-#      matrix:
-#        build: [Release]
-#
-#    steps:
-#      - name: Clone
-#        uses: actions/checkout@v1
-#
-#      - name: Dependencies
-#        run: |
-#          wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
-#          tar -xvf master.tar.gz
-#          emsdk-master/emsdk update
-#          emsdk-master/emsdk install latest
-#          emsdk-master/emsdk activate latest
-#
-#      - name: Configure
-#        run: echo "tmp"
-#
-#      - name: Build
-#        run: |
-#          pushd emsdk-master
-#          source ./emsdk_env.sh
-#          popd
-#          emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-#          make
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
deleted file mode 100644
index 379fbd7ad35..00000000000
--- a/.github/workflows/docker.yml
+++ /dev/null
@@ -1,65 +0,0 @@
-# This workflow uses actions that are not certified by GitHub.
-# They are provided by a third-party and are governed by
-# separate terms of service, privacy policy, and support
-# documentation.
-
-# GitHub recommends pinning actions to a commit SHA.
-# To get a newer version, you will need to update the SHA.
-# You can also reference a tag or branch, but the action may change without warning.
-
-name: Publish Docker image
-
-on:
-  pull_request:
-  push:
-    branches:
-      - master
-
-jobs:
-  push_to_registry:
-    name: Push Docker image to Docker Hub
-    if: github.event.pull_request.draft == false
-
-    runs-on: ubuntu-latest
-    env:
-      COMMIT_SHA: ${{ github.sha }}
-    strategy:
-      matrix:
-        config:
-          - { tag: "light", dockerfile: ".devops/main.Dockerfile" }
-          - { tag: "full", dockerfile: ".devops/full.Dockerfile" }
-    steps:
-      - name: Check out the repo
-        uses: actions/checkout@v3
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v2
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
-
-      - name: Log in to Docker Hub
-        uses: docker/login-action@v2
-        with:
-          registry: ghcr.io
-          username: ${{ github.repository_owner }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Build and push Docker image (versioned)
-        if: github.event_name == 'push'
-        uses: docker/build-push-action@v4
-        with:
-          context: .
-          push: true
-          platforms: linux/amd64,linux/arm64
-          tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
-          file: ${{ matrix.config.dockerfile }}
-
-      - name: Build and push Docker image (tagged)
-        uses: docker/build-push-action@v4
-        with:
-          context: .
-          push: ${{ github.event_name == 'push' }}
-          platforms: linux/amd64,linux/arm64
-          tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}"
-          file: ${{ matrix.config.dockerfile }}
diff --git a/.github/workflows/editorconfig.yml b/.github/workflows/editorconfig.yml
deleted file mode 100644
index b4e535acf1f..00000000000
--- a/.github/workflows/editorconfig.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-name: EditorConfig Checker
-
-on:
-  push:
-    branches:
-      - master
-  pull_request:
-    branches:
-      - master
-
-jobs:
-  editorconfig:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-      - uses: editorconfig-checker/action-editorconfig-checker@main
-      - run: editorconfig-checker
diff --git a/.github/workflows/tidy-review.yml b/.github/workflows/tidy-review.yml
new file mode 100644
index 00000000000..1f7c7939992
--- /dev/null
+++ b/.github/workflows/tidy-review.yml
@@ -0,0 +1,23 @@
+name: clang-tidy review
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+
+jobs:
+  clang-tidy-review:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - uses: ZedThree/clang-tidy-review@v0.13.0
+      id: review
+      with:
+        lgtm_comment_body: ''
+        build_dir: build
+        cmake_command: cmake . -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=on

From ac2b781c119dabc8488e50497609a06009837fe8 Mon Sep 17 00:00:00 2001
From: slaren <2141330+slaren@users.noreply.github.com>
Date: Thu, 11 May 2023 20:39:11 +0200
Subject: [PATCH 2/2] test

---
 examples/benchmark/benchmark-matmult.cpp | 524 +++++++++++------------
 1 file changed, 262 insertions(+), 262 deletions(-)

diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp
index 6117ae3abf8..32d8ae73c33 100644
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@@ -1,262 +1,262 @@
-#include <locale.h>
-#include "ggml.h"
-#include "build-info.h"
-#include <assert.h>
-#include <math.h>
-#include <cstring>
-#include <cstdio>
-#include <cinttypes>
-#include <unordered_map>
-#include <queue>
-#include <string.h>
-#include <cassert>
-#include <fstream>
-#include <string>
-#include <iterator>
-#include <algorithm>
-
-float tensor_sum_elements(struct ggml_tensor * tensor) {
-    float sum = 0;
-    if (tensor->type==GGML_TYPE_F32) {
-        for (int j = 0; j < tensor->ne[1]; j++) {
-            for (int k = 0; k < tensor->ne[0]; k++) {
-                sum +=  ((float *) tensor->data)[j*tensor->ne[0]+k];
-            }
-        }
-    }
-    return sum;
-}
-
-
-/*
-    These are mapping to unknown
-    GGML_TYPE_I8,
-    GGML_TYPE_I16,
-    GGML_TYPE_I32,
-    GGML_TYPE_COUNT,
-*/
-
-#define TENSOR_TYPE_AS_STR(TYPE) TYPE == GGML_TYPE_F32 ? "FP32" : TYPE == GGML_TYPE_F16 ? "FP16" : TYPE == GGML_TYPE_Q4_0 ? "Q4_0" : TYPE == GGML_TYPE_Q4_1 ? "Q4_1" : "UNKNOWN"
-
-#define TENSOR_DUMP(TENSOR) printf("%15s: type = %i (%5s) ne = %5d x %5d x %5d, nb = (%5li, %5li, %5li) - ", #TENSOR, \
-        TENSOR->type,TENSOR_TYPE_AS_STR(TENSOR->type),\
-        (int) TENSOR->ne[0], (int) TENSOR->ne[1], (int) TENSOR->ne[2], TENSOR->nb[0], TENSOR->nb[1], TENSOR->nb[2]); \
-    { float sum = tensor_sum_elements(TENSOR); printf("Sum of tensor %s is %6.2f\n",#TENSOR, sum); }
-
-struct benchmark_params_struct {
-    int32_t n_threads     = 1;
-    int32_t n_iterations  = 10;
-};
-
-void print_usage(int /*argc*/, char ** argv, struct benchmark_params_struct params) {
-    fprintf(stderr, "usage: %s [options]\n", argv[0]);
-    fprintf(stderr, "\n");
-    fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h, --help            show this help message and exit\n");
-    fprintf(stderr, "  -t N, --threads N     number of threads to use during computation (default: %d)\n", params.n_threads);
-    fprintf(stderr, "  -i N, --iter N     number of iterations to use during computation (default: %d)\n", params.n_iterations);
-    fprintf(stderr, "\n");
-}
-
-int main(int argc, char ** argv)  {
-
-
-    struct benchmark_params_struct benchmark_params;
-
-    bool invalid_param = false;
-    std::string arg;
-    for (int i = 1; i < argc; i++) {
-        arg = argv[i];
-
-        if (arg == "-t" || arg == "--threads") {
-            if (++i >= argc) {
-                invalid_param = true;
-                break;
-            }
-            benchmark_params.n_threads = std::stoi(argv[i]);
-        } else if (arg == "-i" || arg == "--iter") {
-            if (++i >= argc) {
-                invalid_param = true;
-                break;
-            }
-            benchmark_params.n_iterations = std::stoi(argv[i]);
-        }  else if (arg == "-h" || arg == "--help") {
-            print_usage(argc, argv, benchmark_params);
-            exit(0);
-        }
-        if (invalid_param) {
-            fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
-            print_usage(argc, argv, benchmark_params);
-            exit(1);
-        }
-    }
-
-    fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
-    printf("Starting Test\n");
-
-    // create the ggml context
-    struct ggml_context * ctx;
-    //const int sizex = 4096;
-    //const int sizey = 11008;
-
-#undef VERBOSE_DEBUGGING
-#ifndef VERBOSE_DEBUGGING
-    const int sizey = 4096;
-    const int sizex = 11008;
-    const int sizez = 128;
-#else
-    /* Working - let's increase size */
-    const int sizey = 1;
-    const int sizex = (8*32);
-    const int sizez = 1;
-
-    /*const int sizey = 1;
-    const int sizex = 3*(8*32);
-    const int sizez = 1;*/
-#endif
-
-    //printf("Memsize required = %i\n", sizex*sizex);
-
-    size_t ctx_size = 0;
-    ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32);
-    ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32);
-    ctx_size += sizex*sizez*ggml_type_sizef(GGML_TYPE_F32);
-    ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_Q4_0);
-    ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_Q4_0);
-    ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS
-    ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS
-    ctx_size += 1024*1024*16;
-
-    printf("Allocating Memory of size %li bytes, %li MB\n",ctx_size, (ctx_size/1024/1024));
-
-    struct ggml_init_params params = {
-        /*.mem_size   =*/ ctx_size,
-        /*.mem_buffer =*/ NULL,
-        /* no_alloc   =*/ 0
-    };
-
-    ctx = ggml_init(params);
-    if (!ctx) {
-        fprintf(stderr, "%s: ggml_init() failed\n", __func__);
-        return 1;
-    }
-
-
-    printf("Creating new tensors\n");
-    // printf("Creating new tensor m1\n");
-    struct ggml_tensor * m11 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, sizex, sizey);
-    ggml_set_f32(m11, 1.0f);
-
-    // printf("Creating new tensor m1\n");
-    struct ggml_tensor * m12 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, sizex, sizey);
-    ggml_set_f32(m12, 1.5f);
-
-    // printf("Creating new tensor m2\n");
-    struct ggml_tensor * m2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, sizex, sizez);
-    ggml_set_f32(m2, 2.0f);
-
-    printf("\n------ Test 1 - Matrix Mult via F32 code ------------------------------------------------------------------------------\n");
-    // printf("Creating new tensor m11xm2\n");
-    struct ggml_tensor * m11xm2 = ggml_mul_mat(ctx, m11, m2);
-
-    // printf("Creating compute graph\n");
-    struct ggml_cgraph gf = ggml_build_forward(m11xm2);
-
-    gf.n_threads=benchmark_params.n_threads;
-    printf("cgraph->n_threads=%i\n",gf.n_threads);
-
-    TENSOR_DUMP(m11);
-    TENSOR_DUMP(m2);
-
-    ggml_graph_compute(ctx, &gf);
-
-    TENSOR_DUMP(gf.nodes[0]);
-
-    printf("\n------ Test 2 - Matrix Mult via Q4_0 code ------------------------------------------------------------------------------\n");
-
-    int32_t nelements = sizex*sizey;
-    int32_t ne[2] = { sizex, sizey };
-
-    std::vector<int64_t> hist_cur(1 << 4, 0);
-
-    // Set up a the benchmark matrices
-    // printf("Creating new tensor q11 & Running quantize\n");
-    struct ggml_tensor * q11 = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, sizex, sizey);
-    ggml_quantize_q4_0((const float *) m11->data, q11->data, nelements, ne[0], hist_cur.data());
-
-    // Set up a the compute graph
-    // printf("Creating new tensor q31\n");
-    struct ggml_tensor * q31 = ggml_mul_mat(ctx, q11, m2);
-
-    // printf("Creating compute graph\n");
-    struct ggml_cgraph gf31 = ggml_build_forward(q31);
-    gf31.n_threads=benchmark_params.n_threads;
-
-    // Set up a second graph computation to make sure we override the CPU cache lines
-    // printf("Creating new tensor q12 & Running quantize\n");
-    struct ggml_tensor * q12 = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, sizex, sizey);
-    ggml_quantize_q4_0((const float *) m12->data, q12->data, nelements, ne[0], hist_cur.data());
-
-    // printf("Creating new tensor q32\n");
-    struct ggml_tensor * q32 = ggml_mul_mat(ctx, q12, m2);
-
-    //printf("Creating compute graph\n");
-    struct ggml_cgraph gf32 = ggml_build_forward(q32);
-    gf32.n_threads=benchmark_params.n_threads;
-    printf("cgraph->n_threads=%i\n",gf31.n_threads);
-
-    const int dimx = sizex;
-    const int dimy = sizey;
-    const int dimz = sizez;
-    long long int flops_per_dot_product = dimy + dimy;
-    long long int flops_per_matrix = flops_per_dot_product * dimx * dimz; ;
-    printf("Matrix Multiplication of (%i,%i,%i) x (%i,%i,%i) - about %6.2f gFLOPS\n\n", sizex, sizey, 1, sizex, sizez, 1, 1.0f*flops_per_matrix / 1000 / 1000 / 1000);
-
-
-    // Let's use the F32 result from above as a reference for the q4_0 multiplication
-    float sum_of_F32_reference = tensor_sum_elements(gf.nodes[0]);
-
-
-    printf("Iteration;NThreads; SizeX; SizeY; SizeZ; Required_FLOPS; Elapsed_u_Seconds; FLOPS_per_u_Second\n");
-    printf("==============================================================================================\n");
-
-    for (int i=0;i<benchmark_params.n_iterations ;i++) {
-
-        long long int start = ggml_time_us();
-        //printf("Running ggml_graph_compute\n");
-        ggml_graph_compute(ctx, &gf31);
-        long long int stop = ggml_time_us();
-        long long int usec = stop-start;
-        float flops_per_usec = (1.0f*flops_per_matrix)/usec;
-        printf("%9i;%8i;%6i;%6i;%6i;%15lli;%18lli;%19.2f\n",
-            i,
-            gf31.n_threads,
-            sizex, sizey, sizez, flops_per_matrix,
-            usec,flops_per_usec);
-
-#ifdef VERBOSE_DEBUGGING
-        TENSOR_DUMP("res",gf31.nodes[0])
-#endif
-
-        // Check that the matrix multiplication result is in the right ballpark
-        // We cannot use the exact value from the F32 multiplication because the quantizuation will be slightly different
-        float sum_of_Q4_result = tensor_sum_elements(gf31.nodes[0]);
-        float delta = abs(sum_of_Q4_result - sum_of_F32_reference);
-        float allowed_delta = (sum_of_F32_reference) / 1000 / 1000; //  Let's accept an epsilon of 10^-6
-
-        if (delta > allowed_delta)  {
-            printf("\nABORT - ERROR in Matrix Multiplication result - expected %6.2f, got %6.2f (delta %6.2f > allowed_delta %6.2f)\n",
-                sum_of_F32_reference,
-                sum_of_Q4_result,
-                delta,
-                allowed_delta
-            );
-            exit(0);
-        }
-
-        // Running a different graph computation to make sure we override the CPU cache lines
-        ggml_graph_compute(ctx, &gf32);
-
-    }
-
-}
+#include <locale.h> 
+#include "ggml.h" 
+#include "build-info.h" 
+#include <assert.h> 
+#include <math.h> 
+#include <cstring> 
+#include <cstdio> 
+#include <cinttypes> 
+#include <unordered_map> 
+#include <queue> 
+#include <string.h> 
+#include <cassert> 
+#include <fstream> 
+#include <string> 
+#include <iterator> 
+#include <algorithm> 
+ 
+float tensor_sum_elements(struct ggml_tensor * tensor) { 
+    float sum = 0; 
+    if (tensor->type==GGML_TYPE_F32) { 
+        for (int j = 0; j < tensor->ne[1]; j++) { 
+            for (int k = 0; k < tensor->ne[0]; k++) { 
+                sum +=  ((float *) tensor->data)[j*tensor->ne[0]+k]; 
+            } 
+        } 
+    } 
+    return sum; 
+} 
+ 
+ 
+/* 
+    These are mapping to unknown 
+    GGML_TYPE_I8, 
+    GGML_TYPE_I16, 
+    GGML_TYPE_I32, 
+    GGML_TYPE_COUNT, 
+*/ 
+ 
+#define TENSOR_TYPE_AS_STR(TYPE) TYPE == GGML_TYPE_F32 ? "FP32" : TYPE == GGML_TYPE_F16 ? "FP16" : TYPE == GGML_TYPE_Q4_0 ? "Q4_0" : TYPE == GGML_TYPE_Q4_1 ? "Q4_1" : "UNKNOWN" 
+ 
+#define TENSOR_DUMP(TENSOR) printf("%15s: type = %i (%5s) ne = %5d x %5d x %5d, nb = (%5li, %5li, %5li) - ", #TENSOR, \ 
+        TENSOR->type,TENSOR_TYPE_AS_STR(TENSOR->type),\ 
+        (int) TENSOR->ne[0], (int) TENSOR->ne[1], (int) TENSOR->ne[2], TENSOR->nb[0], TENSOR->nb[1], TENSOR->nb[2]); \ 
+    { float sum = tensor_sum_elements(TENSOR); printf("Sum of tensor %s is %6.2f\n",#TENSOR, sum); } 
+ 
+struct benchmark_params_struct { 
+    int32_t n_threads     = 1; 
+    int32_t n_iterations  = 10; 
+}; 
+ 
+void print_usage(int /*argc*/, char ** argv, struct benchmark_params_struct params) { 
+    fprintf(stderr, "usage: %s [options]\n", argv[0]); 
+    fprintf(stderr, "\n"); 
+    fprintf(stderr, "options:\n"); 
+    fprintf(stderr, "  -h, --help            show this help message and exit\n"); 
+    fprintf(stderr, "  -t N, --threads N     number of threads to use during computation (default: %d)\n", params.n_threads); 
+    fprintf(stderr, "  -i N, --iter N     number of iterations to use during computation (default: %d)\n", params.n_iterations); 
+    fprintf(stderr, "\n"); 
+} 
+ 
+int main(int argc, char ** argv)  { 
+ 
+ 
+    struct benchmark_params_struct benchmark_params; 
+ 
+    bool invalid_param = false; 
+    std::string arg; 
+    for (int i = 1; i < argc; i++) { 
+        arg = argv[i]; 
+ 
+        if (arg == "-t" || arg == "--threads") { 
+            if (++i >= argc) { 
+                invalid_param = true; 
+                break; 
+            } 
+            benchmark_params.n_threads = std::stoi(argv[i]); 
+        } else if (arg == "-i" || arg == "--iter") { 
+            if (++i >= argc) { 
+                invalid_param = true; 
+                break; 
+            } 
+            benchmark_params.n_iterations = std::stoi(argv[i]); 
+        }  else if (arg == "-h" || arg == "--help") { 
+            print_usage(argc, argv, benchmark_params); 
+            exit(0); 
+        } 
+        if (invalid_param) { 
+            fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str()); 
+            print_usage(argc, argv, benchmark_params); 
+            exit(1); 
+        } 
+    } 
+ 
+    fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); 
+    printf("Starting Test\n"); 
+ 
+    // create the ggml context 
+    struct ggml_context * ctx; 
+    //const int sizex = 4096; 
+    //const int sizey = 11008; 
+ 
+#undef VERBOSE_DEBUGGING 
+#ifndef VERBOSE_DEBUGGING 
+    const int sizey = 4096; 
+    const int sizex = 11008; 
+    const int sizez = 128; 
+#else 
+    /* Working - let's increase size */ 
+    const int sizey = 1; 
+    const int sizex = (8*32); 
+    const int sizez = 1; 
+ 
+    /*const int sizey = 1; 
+    const int sizex = 3*(8*32); 
+    const int sizez = 1;*/ 
+#endif 
+ 
+    //printf("Memsize required = %i\n", sizex*sizex); 
+ 
+    size_t ctx_size = 0; 
+    ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); 
+    ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); 
+    ctx_size += sizex*sizez*ggml_type_sizef(GGML_TYPE_F32); 
+    ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_Q4_0); 
+    ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_Q4_0); 
+    ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS 
+    ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS 
+    ctx_size += 1024*1024*16; 
+ 
+    printf("Allocating Memory of size %li bytes, %li MB\n",ctx_size, (ctx_size/1024/1024)); 
+ 
+    struct ggml_init_params params = { 
+        /*.mem_size   =*/ ctx_size, 
+        /*.mem_buffer =*/ NULL, 
+        /* no_alloc   =*/ 0 
+    }; 
+ 
+    ctx = ggml_init(params); 
+    if (!ctx) { 
+        fprintf(stderr, "%s: ggml_init() failed\n", __func__); 
+        return 1; 
+    } 
+ 
+ 
+    printf("Creating new tensors\n"); 
+    // printf("Creating new tensor m1\n"); 
+    struct ggml_tensor * m11 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, sizex, sizey); 
+    ggml_set_f32(m11, 1.0f); 
+ 
+    // printf("Creating new tensor m1\n"); 
+    struct ggml_tensor * m12 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, sizex, sizey); 
+    ggml_set_f32(m12, 1.5f); 
+ 
+    // printf("Creating new tensor m2\n"); 
+    struct ggml_tensor * m2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, sizex, sizez); 
+    ggml_set_f32(m2, 2.0f); 
+ 
+    printf("\n------ Test 1 - Matrix Mult via F32 code ------------------------------------------------------------------------------\n"); 
+    // printf("Creating new tensor m11xm2\n"); 
+    struct ggml_tensor * m11xm2 = ggml_mul_mat(ctx, m11, m2); 
+ 
+    // printf("Creating compute graph\n"); 
+    struct ggml_cgraph gf = ggml_build_forward(m11xm2); 
+ 
+    gf.n_threads=benchmark_params.n_threads; 
+    printf("cgraph->n_threads=%i\n",gf.n_threads); 
+ 
+    TENSOR_DUMP(m11); 
+    TENSOR_DUMP(m2); 
+ 
+    ggml_graph_compute(ctx, &gf); 
+ 
+    TENSOR_DUMP(gf.nodes[0]); 
+ 
+    printf("\n------ Test 2 - Matrix Mult via Q4_0 code ------------------------------------------------------------------------------\n"); 
+ 
+    int32_t nelements = sizex*sizey; 
+    int32_t ne[2] = { sizex, sizey }; 
+ 
+    std::vector<int64_t> hist_cur(1 << 4, 0); 
+ 
+    // Set up a the benchmark matrices 
+    // printf("Creating new tensor q11 & Running quantize\n"); 
+    struct ggml_tensor * q11 = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, sizex, sizey); 
+    ggml_quantize_q4_0((const float *) m11->data, q11->data, nelements, ne[0], hist_cur.data()); 
+ 
+    // Set up a the compute graph 
+    // printf("Creating new tensor q31\n"); 
+    struct ggml_tensor * q31 = ggml_mul_mat(ctx, q11, m2); 
+ 
+    // printf("Creating compute graph\n"); 
+    struct ggml_cgraph gf31 = ggml_build_forward(q31); 
+    gf31.n_threads=benchmark_params.n_threads; 
+ 
+    // Set up a second graph computation to make sure we override the CPU cache lines 
+    // printf("Creating new tensor q12 & Running quantize\n"); 
+    struct ggml_tensor * q12 = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, sizex, sizey); 
+    ggml_quantize_q4_0((const float *) m12->data, q12->data, nelements, ne[0], hist_cur.data()); 
+ 
+    // printf("Creating new tensor q32\n"); 
+    struct ggml_tensor * q32 = ggml_mul_mat(ctx, q12, m2); 
+ 
+    //printf("Creating compute graph\n"); 
+    struct ggml_cgraph gf32 = ggml_build_forward(q32); 
+    gf32.n_threads=benchmark_params.n_threads; 
+    printf("cgraph->n_threads=%i\n",gf31.n_threads); 
+ 
+    const int dimx = sizex; 
+    const int dimy = sizey; 
+    const int dimz = sizez; 
+    long long int flops_per_dot_product = dimy + dimy; 
+    long long int flops_per_matrix = flops_per_dot_product * dimx * dimz; ; 
+    printf("Matrix Multiplication of (%i,%i,%i) x (%i,%i,%i) - about %6.2f gFLOPS\n\n", sizex, sizey, 1, sizex, sizez, 1, 1.0f*flops_per_matrix / 1000 / 1000 / 1000); 
+ 
+ 
+    // Let's use the F32 result from above as a reference for the q4_0 multiplication 
+    float sum_of_F32_reference = tensor_sum_elements(gf.nodes[0]); 
+ 
+ 
+    printf("Iteration;NThreads; SizeX; SizeY; SizeZ; Required_FLOPS; Elapsed_u_Seconds; FLOPS_per_u_Second\n"); 
+    printf("==============================================================================================\n"); 
+ 
+    for (int i=0;i<benchmark_params.n_iterations ;i++) { 
+ 
+        long long int start = ggml_time_us(); 
+        //printf("Running ggml_graph_compute\n"); 
+        ggml_graph_compute(ctx, &gf31); 
+        long long int stop = ggml_time_us(); 
+        long long int usec = stop-start; 
+        float flops_per_usec = (1.0f*flops_per_matrix)/usec; 
+        printf("%9i;%8i;%6i;%6i;%6i;%15lli;%18lli;%19.2f\n", 
+            i, 
+            gf31.n_threads, 
+            sizex, sizey, sizez, flops_per_matrix, 
+            usec,flops_per_usec); 
+ 
+#ifdef VERBOSE_DEBUGGING 
+        TENSOR_DUMP("res",gf31.nodes[0]) 
+#endif 
+ 
+        // Check that the matrix multiplication result is in the right ballpark 
+        // We cannot use the exact value from the F32 multiplication because the quantizuation will be slightly different 
+        float sum_of_Q4_result = tensor_sum_elements(gf31.nodes[0]); 
+        float delta = abs(sum_of_Q4_result - sum_of_F32_reference); 
+        float allowed_delta = (sum_of_F32_reference) / 1000 / 1000; //  Let's accept an epsilon of 10^-6 
+ 
+        if (delta > allowed_delta)  { 
+            printf("\nABORT - ERROR in Matrix Multiplication result - expected %6.2f, got %6.2f (delta %6.2f > allowed_delta %6.2f)\n", 
+                sum_of_F32_reference, 
+                sum_of_Q4_result, 
+                delta, 
+                allowed_delta 
+            ); 
+            exit(0); 
+        } 
+ 
+        // Running a different graph computation to make sure we override the CPU cache lines 
+        ggml_graph_compute(ctx, &gf32); 
+ 
+    } 
+ 
+}