From 9ee7a60f826df2f1469bc1257a3754a734529358 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Mon, 3 Feb 2025 18:11:30 -0600 Subject: [PATCH 01/25] Add preliminary Circle CI configuration. --- .circleci/config.yml | 242 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000000..c3001cbb84 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,242 @@ +version: 2.1 + +branches: + only: + - master + - dev + - amd + - circleci + +linux: # Docker using the Base Convenience Image + docker: + - image: 'cimg/base:2024.10' +macos: # macos executor running Xcode + macos: + xcode: 14.2.0 + +workflows: + build: + jobs: + # full testsuite (all tests + mixed datatype (gemm_nn only) + salt + SDE + OOT) + - build: + parameters: + os: linux + environment: + CC: 'gcc' + OOT: 1 + TEST: 'ALL' + SDE: 1 + THR: 'none' + CONF: 'x86_64' + PACKAGES: '' + # openmp build + - build: + parameters: + os: linux + environment: + CC: 'gcc' + OOT: 0 + TEST: 'FAST' + SDE: 0 + THR: 'openmp' + CONF: 'auto' + PACKAGES: '' + # pthreads build + - build: + parameters: + os: linux + environment: + CC: 'gcc' + OOT: 0 + TEST: 'FAST' + SDE: 0 + THR: 'pthreads' + CONF: 'auto' + PACKAGES: '' + # clang build + - build: + parameters: + os: linux + environment: + CC: 'clang' + OOT: 0 + TEST: 'FAST' + SDE: 0 + THR: 'none' + CONF: 'auto' + PACKAGES: '' + # macOS with system compiler (clang) + - build: + parameters: + os: macos + environment: + CC: 'clang' + OOT: 0 + TEST: 'FAST' + SDE: 0 + THR: 'none' + CONF: 'auto' + PACKAGES: '' + # cortexa15 build and fast testsuite (qemu) + - build: + parameters: + os: 'linux' + environment: + CC: 'arm-linux-gnueabihf-gcc' + OOT: 0 + TEST: 'FAST' + SDE: 0 + THR: 'none' + CONF: 'cortexa15' + PACKAGES: 'gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-system-arm qemu-user' + TESTSUITE_WRAPPER: 'qemu-arm -cpu cortex-a15 -L /usr/arm-linux-gnueabihf/' + # cortexa57 build and fast testsuite (qemu) + - build: + parameters: + os: linux + environment: + CC: 'aarch64-linux-gnu-gcc' + OOT: 0 + TEST: 'FAST' + SDE: 0 + THR: 'none' + CONF: 'cortexa57' + PACKAGES: 'gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' + TESTSUITE_WRAPPER: 'qemu-aarch64 -L /usr/aarch64-linux-gnu/' + # Apple M1 (firestorm) build and fast testsuite (qemu) + - build: + parameters: + os: linux + environment: + CC: 'aarch64-linux-gnu-gcc' + OOT: 0 + TEST: 'FAST' + SDE: 0 + THR: 'none' + CONF: 'firestorm' + PACKAGES: 'gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' + TESTSUITE_WRAPPER: 'qemu-aarch64 -L /usr/aarch64-linux-gnu/' + # armsve build and fast testsuite (qemu) + - build: + parameters: + os: linux + environment: + CC: 'aarch64-linux-gnu-gcc-10' + OOT: 0 + TEST: 'FAST' + SDE: 0 + THR: 'none' + CONF: 'firestorm' + PACKAGES: 'gcc-10-aarch64-linux-gnu g++-10-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' + TESTSUITE_WRAPPER: 'qemu-aarch64 -cpu max,sve=true,sve512=true -L /usr/aarch64-linux-gnu/' + # arm64 build and fast testsuite (qemu) + # NOTE: This entry omits the -cpu flag so that while both NEON and SVE kernels + # are compiled, only NEON kernels will be tested. (h/t to RuQing Xu) + - build: + parameters: + os: linux + environment: + CC: 'aarch64-linux-gnu-gcc-10' + OOT: 0 + TEST: 'FAST' + SDE: 0 + THR: 'none' + CONF: 'arm64' + PACKAGES: 'gcc-10-aarch64-linux-gnu g++-10-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' + TESTSUITE_WRAPPER: 'qemu-aarch64 -L /usr/aarch64-linux-gnu/' + # The RISC-V targets require the qemu version available in jammy or newer. + # When CI is upgraded, the packages should be activated and do_script.sh + # cleaned up. + # PACKAGES="qemu-user qemu-user-binfmt" + - build: + parameters: + os: linux + environment: + CC: 'riscv64-unknown-linux-gcc' + OOT: 0 + TEST: 'FAST' + SDE: 0 + THR: 'none' + CONF: 'rv64iv' + BLD: '--disable-shared' + LDFLAGS: '-static' + - build: + parameters: + os: linux + environment: + CC: 'riscv32-unknown-linux-gcc' + OOT: 0 + TEST: 'FAST' + SDE: 0 + THR: 'none' + CONF: 'rv32iv' + BLD: '--disable-shared' + LDFLAGS: '-static' + - build: + parameters: + os: linux + environment: + CC: 'clang' + OOT: 0 + TEST: 'FAST' + SDE: 0 + THR: 'none' + CONF: 'sifive_x280' + BLD: '--disable-shared' + LDFLAGS: '-static' + +jobs: + build: + parameters: + os: + type: executor + executor: << parameters.os >> + steps: + - checkout + - run: + name: Installing Dependencies + command: 'sudo apt-get update && sudo apt-get install -y clang make python3 $PACKAGES' + - run: + name: Configuring + command: | + export DIST_PATH=. + pwd + if [ $OOT -eq 1 ]; then export DIST_PATH=`pwd`; mkdir ../oot; cd ../oot; chmod -R a-w $DIST_PATH; fi + pwd + if [ "$CONF" = "rv64iv" ]; then + $DIST_PATH/travis/do_riscv.sh "$CONF"; + export CC=$DIST_PATH/../toolchain/riscv/bin/riscv64-unknown-linux-gnu-gcc; + export CXX=$DIST_PATH/../toolchain/riscv/bin/riscv64-unknown-linux-gnu-g++; + export TESTSUITE_WRAPPER="$DIST_PATH/../toolchain/qemu-riscv64 -cpu rv64,vext_spec=v1.0,v=true,vlen=128 -B 0x100000"; + fi + if [ "$CONF" = "rv32iv" ]; then + $DIST_PATH/travis/do_riscv.sh "$CONF"; + export CC=$DIST_PATH/../toolchain/riscv/bin/riscv32-unknown-linux-gnu-gcc; + export CXX=$DIST_PATH/../toolchain/riscv/bin/riscv32-unknown-linux-gnu-g++; + export TESTSUITE_WRAPPER="$DIST_PATH/../toolchain/qemu-riscv32 -cpu rv32,vext_spec=v1.0,v=true,vlen=128 -B 0x100000"; + fi + if [ "$CONF" = "sifive_x280" ]; then + $DIST_PATH/travis/do_riscv.sh "$CONF"; + export CC=$DIST_PATH/../toolchain/riscv/bin/clang; + export CXX=$DIST_PATH/../toolchain/riscv/bin/clang++; + export TESTSUITE_WRAPPER="$DIST_PATH/../toolchain/qemu-riscv64 -cpu rv64,vext_spec=v1.0,v=true,vlen=512 -B 0x100000"; + fi + $DIST_PATH/configure -p `pwd`/../install -t $THR $BLD CC=$CC $CONF + pwd + ls -l + $CC --version + $CC -v + - run: + name: Building + command: | + make -j2 + make install + - run: + name: Testing + command: | + if [ "$BLD" = "" ]; then $DIST_PATH/travis/cxx/cxx-test.sh $DIST_PATH $(ls -1 include); fi + # Qemu SVE is failing sgemmt in some cases. Skip as this issue is not observed + # on real chip (A64fx). + if [ "$CONF" = "armsve" ]; then sed -i 's/.*\.*/0/' $DIST_PATH/testsuite/input.operations.fast; fi + if [ "$TEST" != "0" ]; then travis_wait 30 $DIST_PATH/travis/do_testsuite.sh; fi + if [ "$SDE" = "1" ]; then travis_wait 30 $DIST_PATH/travis/do_sde.sh; fi From 6bb85da52d4d224db05bb2a43013aeeb98417660 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Mon, 3 Feb 2025 18:13:31 -0600 Subject: [PATCH 02/25] Edit. --- .circleci/config.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c3001cbb84..8319c4e7e1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -20,7 +20,7 @@ workflows: # full testsuite (all tests + mixed datatype (gemm_nn only) + salt + SDE + OOT) - build: parameters: - os: linux + os: 'linux' environment: CC: 'gcc' OOT: 1 @@ -32,7 +32,7 @@ workflows: # openmp build - build: parameters: - os: linux + os: 'linux' environment: CC: 'gcc' OOT: 0 @@ -44,7 +44,7 @@ workflows: # pthreads build - build: parameters: - os: linux + os: 'linux' environment: CC: 'gcc' OOT: 0 @@ -56,7 +56,7 @@ workflows: # clang build - build: parameters: - os: linux + os: 'linux' environment: CC: 'clang' OOT: 0 @@ -68,7 +68,7 @@ workflows: # macOS with system compiler (clang) - build: parameters: - os: macos + os: 'macos' environment: CC: 'clang' OOT: 0 @@ -93,7 +93,7 @@ workflows: # cortexa57 build and fast testsuite (qemu) - build: parameters: - os: linux + os: 'linux' environment: CC: 'aarch64-linux-gnu-gcc' OOT: 0 @@ -106,7 +106,7 @@ workflows: # Apple M1 (firestorm) build and fast testsuite (qemu) - build: parameters: - os: linux + os: 'linux' environment: CC: 'aarch64-linux-gnu-gcc' OOT: 0 @@ -119,7 +119,7 @@ workflows: # armsve build and fast testsuite (qemu) - build: parameters: - os: linux + os: 'linux' environment: CC: 'aarch64-linux-gnu-gcc-10' OOT: 0 @@ -134,7 +134,7 @@ workflows: # are compiled, only NEON kernels will be tested. (h/t to RuQing Xu) - build: parameters: - os: linux + os: 'linux' environment: CC: 'aarch64-linux-gnu-gcc-10' OOT: 0 @@ -150,7 +150,7 @@ workflows: # PACKAGES="qemu-user qemu-user-binfmt" - build: parameters: - os: linux + os: 'linux' environment: CC: 'riscv64-unknown-linux-gcc' OOT: 0 @@ -162,7 +162,7 @@ workflows: LDFLAGS: '-static' - build: parameters: - os: linux + os: 'linux' environment: CC: 'riscv32-unknown-linux-gcc' OOT: 0 @@ -174,7 +174,7 @@ workflows: LDFLAGS: '-static' - build: parameters: - os: linux + os: 'linux' environment: CC: 'clang' OOT: 0 From a8ba9f53eb5d7cc6c37377e3b73280f65624a754 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Mon, 3 Feb 2025 18:19:49 -0600 Subject: [PATCH 03/25] Edit. --- .circleci/config.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8319c4e7e1..b6327337e8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -7,12 +7,13 @@ branches: - amd - circleci -linux: # Docker using the Base Convenience Image - docker: - - image: 'cimg/base:2024.10' -macos: # macos executor running Xcode - macos: - xcode: 14.2.0 +executors: + linux: # Docker using the Base Convenience Image + docker: + - image: 'cimg/base:2024.10' + macos: # macos executor running Xcode + macos: + xcode: 14.2.0 workflows: build: From dcc0e60312f5c0d30860aa62505e2ec781345cae Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Mon, 3 Feb 2025 18:45:25 -0600 Subject: [PATCH 04/25] Edit. --- .circleci/config.yml | 262 ++++++++++++++++++++----------------------- 1 file changed, 123 insertions(+), 139 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index b6327337e8..676ce7084d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,192 +18,163 @@ executors: workflows: build: jobs: + # Default: + # - build: + # parameters: + # os: linux + # CC: gcc + # OOT: 0 + # TEST: FAST + # SDE: 0 + # THR: none + # CONF: auto + # BLD: '' + # LDFLAGS: '' + # TESTSUITE_WRAPPER: '' + # PACKAGES: '' + # full testsuite (all tests + mixed datatype (gemm_nn only) + salt + SDE + OOT) - build: - parameters: - os: 'linux' - environment: - CC: 'gcc' - OOT: 1 - TEST: 'ALL' - SDE: 1 - THR: 'none' - CONF: 'x86_64' - PACKAGES: '' + OOT: 1 + TEST: ALL + SDE: 1 + CONF: x86_64 + # openmp build - build: - parameters: - os: 'linux' - environment: - CC: 'gcc' - OOT: 0 - TEST: 'FAST' - SDE: 0 - THR: 'openmp' - CONF: 'auto' - PACKAGES: '' + THR: openmp + # pthreads build - build: - parameters: - os: 'linux' - environment: - CC: 'gcc' - OOT: 0 - TEST: 'FAST' - SDE: 0 - THR: 'pthreads' - CONF: 'auto' - PACKAGES: '' + THR: pthreads + # clang build - build: - parameters: - os: 'linux' - environment: - CC: 'clang' - OOT: 0 - TEST: 'FAST' - SDE: 0 - THR: 'none' - CONF: 'auto' - PACKAGES: '' + CC: clang + # macOS with system compiler (clang) - build: - parameters: - os: 'macos' - environment: - CC: 'clang' - OOT: 0 - TEST: 'FAST' - SDE: 0 - THR: 'none' - CONF: 'auto' - PACKAGES: '' + os: macos + CC: clang + # cortexa15 build and fast testsuite (qemu) - build: - parameters: - os: 'linux' - environment: - CC: 'arm-linux-gnueabihf-gcc' - OOT: 0 - TEST: 'FAST' - SDE: 0 - THR: 'none' - CONF: 'cortexa15' - PACKAGES: 'gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-system-arm qemu-user' - TESTSUITE_WRAPPER: 'qemu-arm -cpu cortex-a15 -L /usr/arm-linux-gnueabihf/' + CC: arm-linux-gnueabihf-gcc + CONF: cortexa15 + PACKAGES: 'gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-system-arm qemu-user' + TESTSUITE_WRAPPER: 'qemu-arm -cpu cortex-a15 -L /usr/arm-linux-gnueabihf/' + # cortexa57 build and fast testsuite (qemu) - build: - parameters: - os: 'linux' - environment: - CC: 'aarch64-linux-gnu-gcc' - OOT: 0 - TEST: 'FAST' - SDE: 0 - THR: 'none' - CONF: 'cortexa57' - PACKAGES: 'gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' - TESTSUITE_WRAPPER: 'qemu-aarch64 -L /usr/aarch64-linux-gnu/' + CC: aarch64-linux-gnu-gcc + CONF: cortexa57 + PACKAGES: 'gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' + TESTSUITE_WRAPPER: 'qemu-aarch64 -L /usr/aarch64-linux-gnu/' + # Apple M1 (firestorm) build and fast testsuite (qemu) - build: - parameters: - os: 'linux' - environment: - CC: 'aarch64-linux-gnu-gcc' - OOT: 0 - TEST: 'FAST' - SDE: 0 - THR: 'none' - CONF: 'firestorm' - PACKAGES: 'gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' - TESTSUITE_WRAPPER: 'qemu-aarch64 -L /usr/aarch64-linux-gnu/' + CC: aarch64-linux-gnu-gcc + CONF: firestorm + PACKAGES: 'gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' + TESTSUITE_WRAPPER: 'qemu-aarch64 -L /usr/aarch64-linux-gnu/' + # armsve build and fast testsuite (qemu) - build: - parameters: - os: 'linux' - environment: - CC: 'aarch64-linux-gnu-gcc-10' - OOT: 0 - TEST: 'FAST' - SDE: 0 - THR: 'none' - CONF: 'firestorm' - PACKAGES: 'gcc-10-aarch64-linux-gnu g++-10-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' - TESTSUITE_WRAPPER: 'qemu-aarch64 -cpu max,sve=true,sve512=true -L /usr/aarch64-linux-gnu/' + CC: aarch64-linux-gnu-gcc-10 + CONF: armsve + PACKAGES: 'gcc-10-aarch64-linux-gnu g++-10-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' + TESTSUITE_WRAPPER: 'qemu-aarch64 -cpu max,sve=true,sve512=true -L /usr/aarch64-linux-gnu/' + # arm64 build and fast testsuite (qemu) # NOTE: This entry omits the -cpu flag so that while both NEON and SVE kernels # are compiled, only NEON kernels will be tested. (h/t to RuQing Xu) - build: - parameters: - os: 'linux' - environment: - CC: 'aarch64-linux-gnu-gcc-10' - OOT: 0 - TEST: 'FAST' - SDE: 0 - THR: 'none' - CONF: 'arm64' - PACKAGES: 'gcc-10-aarch64-linux-gnu g++-10-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' - TESTSUITE_WRAPPER: 'qemu-aarch64 -L /usr/aarch64-linux-gnu/' + CC: aarch64-linux-gnu-gcc-10 + CONF: arm64 + PACKAGES: 'gcc-10-aarch64-linux-gnu g++-10-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' + TESTSUITE_WRAPPER: 'qemu-aarch64 -L /usr/aarch64-linux-gnu/' + # The RISC-V targets require the qemu version available in jammy or newer. # When CI is upgraded, the packages should be activated and do_script.sh # cleaned up. # PACKAGES="qemu-user qemu-user-binfmt" - build: - parameters: - os: 'linux' - environment: - CC: 'riscv64-unknown-linux-gcc' - OOT: 0 - TEST: 'FAST' - SDE: 0 - THR: 'none' - CONF: 'rv64iv' - BLD: '--disable-shared' - LDFLAGS: '-static' + CC: riscv64-unknown-linux-gcc + CONF: rv64iv + BLD: --disable-shared + LDFLAGS: -static - build: - parameters: - os: 'linux' - environment: - CC: 'riscv32-unknown-linux-gcc' - OOT: 0 - TEST: 'FAST' - SDE: 0 - THR: 'none' - CONF: 'rv32iv' - BLD: '--disable-shared' - LDFLAGS: '-static' + CC: riscv32-unknown-linux-gcc + CONF: rv32iv + BLD: --disable-shared + LDFLAGS: -static - build: - parameters: - os: 'linux' - environment: - CC: 'clang' - OOT: 0 - TEST: 'FAST' - SDE: 0 - THR: 'none' - CONF: 'sifive_x280' - BLD: '--disable-shared' - LDFLAGS: '-static' + CC: clang + CONF: sifive_x280 + BLD: --disable-shared + LDFLAGS: -static jobs: build: parameters: os: type: executor + default: linux + CC: + type: string + default: gcc + OOT: + type: integer + default: 0 + TEST: + type: string + default: FAST + SDE: + type: integer + default: 0 + THR: + type: string + default: none + CONF: + type: string + default: auto + BLD: + type: string + default: '' + LDFLAGS: + type: string + default: '' + TESTSUITE_WRAPPER: + type: string + default: '' + PACKAGES: + type: string + default: '' executor: << parameters.os >> steps: - checkout - run: name: Installing Dependencies - command: 'sudo apt-get update && sudo apt-get install -y clang make python3 $PACKAGES' + command: + sudo apt-get update && sudo apt-get install -y clang make python3 << parameters.PACKAGES >> - run: name: Configuring command: | export DIST_PATH=. + export CC=<< parameters.CC >> + export OOT=<< parameters.OOT >> + export CONF=<< parameters.CONF >> + export TEST=<< parameters.TEST >> + export BLD=<< parameters.BLD >> + export LDFLAGS=<< parameters.LDFLAGS >> + export SDE=<< parameters.SDE >> + export THR=<< parameters.THR >> + export TESTSUITE_WRAPPER=<< parameters.TESTSUITE_WRAPPER >> + pwd if [ $OOT -eq 1 ]; then export DIST_PATH=`pwd`; mkdir ../oot; cd ../oot; chmod -R a-w $DIST_PATH; fi pwd + if [ "$CONF" = "rv64iv" ]; then $DIST_PATH/travis/do_riscv.sh "$CONF"; export CC=$DIST_PATH/../toolchain/riscv/bin/riscv64-unknown-linux-gnu-gcc; @@ -222,6 +193,19 @@ jobs: export CXX=$DIST_PATH/../toolchain/riscv/bin/clang++; export TESTSUITE_WRAPPER="$DIST_PATH/../toolchain/qemu-riscv64 -cpu rv64,vext_spec=v1.0,v=true,vlen=512 -B 0x100000"; fi + + echo "Configuration:" + echo "CC = $CC" + echo "OOT = $OOT" + echo "CONF = $CONF" + echo "THR = $THR" + echo "TEST = $TEST" + echo "BLD = $BLD" + echo "SDE = $SDE" + echo "DIST_PATH = $DIST_PATH" + echo "LDFLAGS = $LDFLAGS" + echo "TESTSUITE_WRAPPER = $TESTSUITE_WRAPPER" + $DIST_PATH/configure -p `pwd`/../install -t $THR $BLD CC=$CC $CONF pwd ls -l From 3a8b3f32e65c95b42c61b04f433a33169bd0aeb3 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Mon, 3 Feb 2025 18:51:05 -0600 Subject: [PATCH 05/25] Edit. --- .circleci/config.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 676ce7084d..0da2e3a423 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -158,7 +158,7 @@ jobs: command: sudo apt-get update && sudo apt-get install -y clang make python3 << parameters.PACKAGES >> - run: - name: Configuring + name: Configuring, Building, Testing command: | export DIST_PATH=. export CC=<< parameters.CC >> @@ -211,14 +211,10 @@ jobs: ls -l $CC --version $CC -v - - run: - name: Building - command: | + make -j2 make install - - run: - name: Testing - command: | + if [ "$BLD" = "" ]; then $DIST_PATH/travis/cxx/cxx-test.sh $DIST_PATH $(ls -1 include); fi # Qemu SVE is failing sgemmt in some cases. Skip as this issue is not observed # on real chip (A64fx). From cd9d6961769103909c856ed65afd736e45c600c9 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Mon, 3 Feb 2025 18:57:21 -0600 Subject: [PATCH 06/25] Edit. --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0da2e3a423..04ed164150 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -219,5 +219,5 @@ jobs: # Qemu SVE is failing sgemmt in some cases. Skip as this issue is not observed # on real chip (A64fx). if [ "$CONF" = "armsve" ]; then sed -i 's/.*\.*/0/' $DIST_PATH/testsuite/input.operations.fast; fi - if [ "$TEST" != "0" ]; then travis_wait 30 $DIST_PATH/travis/do_testsuite.sh; fi - if [ "$SDE" = "1" ]; then travis_wait 30 $DIST_PATH/travis/do_sde.sh; fi + if [ "$TEST" != "0" ]; then $DIST_PATH/travis/do_testsuite.sh; fi + if [ "$SDE" = "1" ]; then $DIST_PATH/travis/do_sde.sh; fi From 0401e232a6473887eaba0826848f7d4245c52a62 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Tue, 4 Feb 2025 10:35:56 -0600 Subject: [PATCH 07/25] Edit. --- .circleci/config.yml | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 04ed164150..1c29820710 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,7 +11,7 @@ executors: linux: # Docker using the Base Convenience Image docker: - image: 'cimg/base:2024.10' - macos: # macos executor running Xcode + macos: &macos-executor # macos executor running Xcode macos: xcode: 14.2.0 @@ -153,23 +153,30 @@ jobs: executor: << parameters.os >> steps: - checkout - - run: - name: Installing Dependencies - command: - sudo apt-get update && sudo apt-get install -y clang make python3 << parameters.PACKAGES >> + + - when: + condition: + not: + equal: [ *macos-executor, << parameters.os >> ] + steps: + - run: + name: Installing Dependencies + command: + sudo apt-get update && sudo apt-get install -y clang make python3 << parameters.PACKAGES >> + - run: name: Configuring, Building, Testing command: | export DIST_PATH=. - export CC=<< parameters.CC >> - export OOT=<< parameters.OOT >> - export CONF=<< parameters.CONF >> - export TEST=<< parameters.TEST >> - export BLD=<< parameters.BLD >> - export LDFLAGS=<< parameters.LDFLAGS >> - export SDE=<< parameters.SDE >> - export THR=<< parameters.THR >> - export TESTSUITE_WRAPPER=<< parameters.TESTSUITE_WRAPPER >> + export CC="<< parameters.CC >>" + export OOT="<< parameters.OOT >>" + export CONF="<< parameters.CONF >>" + export TEST="<< parameters.TEST >>" + export BLD="<< parameters.BLD >>" + export LDFLAGS="<< parameters.LDFLAGS >>" + export SDE="<< parameters.SDE >>" + export THR="<< parameters.THR >>" + export TESTSUITE_WRAPPER="<< parameters.TESTSUITE_WRAPPER >>" pwd if [ $OOT -eq 1 ]; then export DIST_PATH=`pwd`; mkdir ../oot; cd ../oot; chmod -R a-w $DIST_PATH; fi From dc955435b662a360f15c43c3d8088da448c5c303 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 5 Feb 2025 11:46:39 -0600 Subject: [PATCH 08/25] Edit. --- .circleci/config.yml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 1c29820710..f8f7965dd5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -51,15 +51,18 @@ workflows: # clang build - build: CC: clang + CXX: clang++ # macOS with system compiler (clang) - build: os: macos CC: clang + CXX: clang++ # cortexa15 build and fast testsuite (qemu) - build: CC: arm-linux-gnueabihf-gcc + CXX: arm-linux-gnueabihf-g++ CONF: cortexa15 PACKAGES: 'gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-system-arm qemu-user' TESTSUITE_WRAPPER: 'qemu-arm -cpu cortex-a15 -L /usr/arm-linux-gnueabihf/' @@ -67,6 +70,7 @@ workflows: # cortexa57 build and fast testsuite (qemu) - build: CC: aarch64-linux-gnu-gcc + CXX: aarch64-linux-gnu-g++ CONF: cortexa57 PACKAGES: 'gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' TESTSUITE_WRAPPER: 'qemu-aarch64 -L /usr/aarch64-linux-gnu/' @@ -74,6 +78,7 @@ workflows: # Apple M1 (firestorm) build and fast testsuite (qemu) - build: CC: aarch64-linux-gnu-gcc + CXX: aarch64-linux-gnu-g++ CONF: firestorm PACKAGES: 'gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' TESTSUITE_WRAPPER: 'qemu-aarch64 -L /usr/aarch64-linux-gnu/' @@ -81,6 +86,7 @@ workflows: # armsve build and fast testsuite (qemu) - build: CC: aarch64-linux-gnu-gcc-10 + CXX: aarch64-linux-gnu-g++-10 CONF: armsve PACKAGES: 'gcc-10-aarch64-linux-gnu g++-10-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' TESTSUITE_WRAPPER: 'qemu-aarch64 -cpu max,sve=true,sve512=true -L /usr/aarch64-linux-gnu/' @@ -90,6 +96,7 @@ workflows: # are compiled, only NEON kernels will be tested. (h/t to RuQing Xu) - build: CC: aarch64-linux-gnu-gcc-10 + CXX: aarch64-linux-gnu-g++-10 CONF: arm64 PACKAGES: 'gcc-10-aarch64-linux-gnu g++-10-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user' TESTSUITE_WRAPPER: 'qemu-aarch64 -L /usr/aarch64-linux-gnu/' @@ -99,17 +106,14 @@ workflows: # cleaned up. # PACKAGES="qemu-user qemu-user-binfmt" - build: - CC: riscv64-unknown-linux-gcc CONF: rv64iv BLD: --disable-shared LDFLAGS: -static - build: - CC: riscv32-unknown-linux-gcc CONF: rv32iv BLD: --disable-shared LDFLAGS: -static - build: - CC: clang CONF: sifive_x280 BLD: --disable-shared LDFLAGS: -static @@ -123,6 +127,9 @@ jobs: CC: type: string default: gcc + CXX: + type: string + default: g++ OOT: type: integer default: 0 @@ -169,6 +176,7 @@ jobs: command: | export DIST_PATH=. export CC="<< parameters.CC >>" + export CXX="<< parameters.CXX >>" export OOT="<< parameters.OOT >>" export CONF="<< parameters.CONF >>" export TEST="<< parameters.TEST >>" @@ -203,6 +211,7 @@ jobs: echo "Configuration:" echo "CC = $CC" + echo "CXX = $CXX" echo "OOT = $OOT" echo "CONF = $CONF" echo "THR = $THR" @@ -222,7 +231,7 @@ jobs: make -j2 make install - if [ "$BLD" = "" ]; then $DIST_PATH/travis/cxx/cxx-test.sh $DIST_PATH $(ls -1 include); fi + if [ "$BLD" = "" ] && [ "$TESTSUITE_WRAPPER" = "" ] ; then $DIST_PATH/travis/cxx/cxx-test.sh $DIST_PATH $(ls -1 include); fi # Qemu SVE is failing sgemmt in some cases. Skip as this issue is not observed # on real chip (A64fx). if [ "$CONF" = "armsve" ]; then sed -i 's/.*\.*/0/' $DIST_PATH/testsuite/input.operations.fast; fi From ca04633d609c0066cf29cc4f980d02afecc0ea4e Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 5 Feb 2025 11:50:28 -0600 Subject: [PATCH 09/25] Increase the max size for stack buffers. Details: - See #850 for details on the problem. - This is a temporary fix which should work for sdcz data types. - Altra architectures may still not fully work for MP/MD as the stack buffer size is hard-coded. --- docs/ConfigurationHowTo.md | 4 ++-- frame/base/bli_check.c | 4 +++- frame/include/bli_kernel_macro_defs.h | 7 ++++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/ConfigurationHowTo.md b/docs/ConfigurationHowTo.md index 9217ae9fdc..0d4fa4cac3 100644 --- a/docs/ConfigurationHowTo.md +++ b/docs/ConfigurationHowTo.md @@ -215,9 +215,9 @@ _**SIMD register file.**_ BLIS allows you to specify the _maximum_ number of SIM #define BLIS_SIMD_MAX_NUM_REGISTERS 32 #define BLIS_SIMD_MAX_SIZE 64 ``` -These macros are used in computing the maximum amount of temporary storage (typically allocated statically, on the function stack) that will be needed to hold a single micro-tile of any datatype (and for any induced method): +These macros are used in computing the maximum amount of temporary storage (typically allocated statically, on the function stack) that will be needed to hold a single micro-tile of any datatype (and for any induced method or mixed-precision computation): ```c -#define BLIS_STACK_BUF_MAX_SIZE ( BLIS_SIMD_MAX_NUM_REGISTERS * BLIS_SIMD_MAX_SIZE * 2 ) +#define BLIS_STACK_BUF_MAX_SIZE ( BLIS_SIMD_MAX_NUM_REGISTERS * BLIS_SIMD_MAX_SIZE * 4 ) ``` These temporary buffers are used when handling edge cases (m % _MR_ != 0 || n % _NR_ != 0) within the level-3 macrokernels, and also in the virtual microkernels of various implementations of induced methods for complex matrix multiplication. It is **very important** that these values be set correctly; otherwise, you may experience undefined behavior as stack data is overwritten at run-time. A kernel developer may set `BLIS_SIMD_MAX_NUM_REGISTERS` and `BLIS_SIMD_MAX_SIZE`, which will indirectly affect `BLIS_STACK_BUF_MAX_SIZE`, or he may set `BLIS_STACK_BUF_MAX_SIZE` directly. Notice that the default values are already set to work with modern x86_64 systems. diff --git a/frame/base/bli_check.c b/frame/base/bli_check.c index e949b6361e..39cefd62ab 100644 --- a/frame/base/bli_check.c +++ b/frame/base/bli_check.c @@ -826,7 +826,9 @@ err_t bli_check_sufficient_stack_buf_size( const cntx_t* cntx ) { dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); - siz_t dt_size = bli_dt_size( dt ); + // Always use the size of the largest data type to account for + // conversions during mixed-domain/mixed-precision computation. + siz_t dt_size = bli_dt_size( BLIS_DCOMPLEX ); // NOTE: For induced methods, we use the size of the complex datatypes // (rather than the size of the native micro-kernels' datatype) because diff --git a/frame/include/bli_kernel_macro_defs.h b/frame/include/bli_kernel_macro_defs.h index 8c0f1cb143..f1d906c125 100644 --- a/frame/include/bli_kernel_macro_defs.h +++ b/frame/include/bli_kernel_macro_defs.h @@ -183,14 +183,15 @@ // The maximum size in bytes of local stack buffers within macro-kernel // functions. These buffers are usually used to store a temporary copy -// of a single microtile. The reason we multiply by 2 is to handle induced +// of a single microtile. The reason we multiply by 4 is to handle induced // methods, where we use real domain register blocksizes in units of -// complex elements. Specifically, the macro-kernels will need this larger +// complex elements, as well as mixed-precision, where data may be +// converted to a wider type. Specifically, the macro-kernels will need this larger // micro-tile footprint, even though the virtual micro-kernels will only // ever be writing to half (real or imaginary part) at a time. #ifndef BLIS_STACK_BUF_MAX_SIZE #define BLIS_STACK_BUF_MAX_SIZE ( BLIS_SIMD_MAX_NUM_REGISTERS * \ - BLIS_SIMD_MAX_SIZE * 2 ) + BLIS_SIMD_MAX_SIZE * 4 ) #endif // Alignment size used to align local stack buffers within macro-kernel From 1de8a59c250416df31ef90369ac6760c0b1ae46a Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 5 Feb 2025 13:12:18 -0600 Subject: [PATCH 10/25] Edit. --- .circleci/config.yml | 12 ++++++------ {travis => ci}/cpuid/excavator.def | 0 {travis => ci}/cpuid/haswell.def | 0 {travis => ci}/cpuid/penryn.def | 0 {travis => ci}/cpuid/piledriver.def | 0 {travis => ci}/cpuid/sandybridge.def | 0 {travis => ci}/cpuid/skx.def | 0 {travis => ci}/cpuid/skx1.def | 0 {travis => ci}/cpuid/steamroller.def | 0 {travis => ci}/cpuid/zen.def | 0 {travis => ci}/cpuid/zen2.def | 0 {travis => ci}/cpuid/zen3.def | 0 {travis => ci}/cxx/Makefile | 0 {travis => ci}/cxx/cxx-test.cxx | 0 {travis => ci}/cxx/cxx-test.sh | 4 ++-- {travis => ci}/do_riscv.sh | 0 {travis => ci}/do_sde.sh | 4 ++-- {travis => ci}/do_testsuite.sh | 15 ++++++++++----- {travis => ci}/patch-ld-so.py | 0 19 files changed, 20 insertions(+), 15 deletions(-) rename {travis => ci}/cpuid/excavator.def (100%) rename {travis => ci}/cpuid/haswell.def (100%) rename {travis => ci}/cpuid/penryn.def (100%) rename {travis => ci}/cpuid/piledriver.def (100%) rename {travis => ci}/cpuid/sandybridge.def (100%) rename {travis => ci}/cpuid/skx.def (100%) rename {travis => ci}/cpuid/skx1.def (100%) rename {travis => ci}/cpuid/steamroller.def (100%) rename {travis => ci}/cpuid/zen.def (100%) rename {travis => ci}/cpuid/zen2.def (100%) rename {travis => ci}/cpuid/zen3.def (100%) rename {travis => ci}/cxx/Makefile (100%) rename {travis => ci}/cxx/cxx-test.cxx (100%) rename {travis => ci}/cxx/cxx-test.sh (93%) rename {travis => ci}/do_riscv.sh (100%) rename {travis => ci}/do_sde.sh (93%) rename {travis => ci}/do_testsuite.sh (76%) rename {travis => ci}/patch-ld-so.py (100%) diff --git a/.circleci/config.yml b/.circleci/config.yml index f8f7965dd5..89019dbfa1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -191,19 +191,19 @@ jobs: pwd if [ "$CONF" = "rv64iv" ]; then - $DIST_PATH/travis/do_riscv.sh "$CONF"; + $DIST_PATH/ci/do_riscv.sh "$CONF"; export CC=$DIST_PATH/../toolchain/riscv/bin/riscv64-unknown-linux-gnu-gcc; export CXX=$DIST_PATH/../toolchain/riscv/bin/riscv64-unknown-linux-gnu-g++; export TESTSUITE_WRAPPER="$DIST_PATH/../toolchain/qemu-riscv64 -cpu rv64,vext_spec=v1.0,v=true,vlen=128 -B 0x100000"; fi if [ "$CONF" = "rv32iv" ]; then - $DIST_PATH/travis/do_riscv.sh "$CONF"; + $DIST_PATH/ci/do_riscv.sh "$CONF"; export CC=$DIST_PATH/../toolchain/riscv/bin/riscv32-unknown-linux-gnu-gcc; export CXX=$DIST_PATH/../toolchain/riscv/bin/riscv32-unknown-linux-gnu-g++; export TESTSUITE_WRAPPER="$DIST_PATH/../toolchain/qemu-riscv32 -cpu rv32,vext_spec=v1.0,v=true,vlen=128 -B 0x100000"; fi if [ "$CONF" = "sifive_x280" ]; then - $DIST_PATH/travis/do_riscv.sh "$CONF"; + $DIST_PATH/ci/do_riscv.sh "$CONF"; export CC=$DIST_PATH/../toolchain/riscv/bin/clang; export CXX=$DIST_PATH/../toolchain/riscv/bin/clang++; export TESTSUITE_WRAPPER="$DIST_PATH/../toolchain/qemu-riscv64 -cpu rv64,vext_spec=v1.0,v=true,vlen=512 -B 0x100000"; @@ -231,9 +231,9 @@ jobs: make -j2 make install - if [ "$BLD" = "" ] && [ "$TESTSUITE_WRAPPER" = "" ] ; then $DIST_PATH/travis/cxx/cxx-test.sh $DIST_PATH $(ls -1 include); fi + if [ "$BLD" = "" ] && [ "$TESTSUITE_WRAPPER" = "" ] ; then $DIST_PATH/ci/cxx/cxx-test.sh $DIST_PATH $(ls -1 include); fi # Qemu SVE is failing sgemmt in some cases. Skip as this issue is not observed # on real chip (A64fx). if [ "$CONF" = "armsve" ]; then sed -i 's/.*\.*/0/' $DIST_PATH/testsuite/input.operations.fast; fi - if [ "$TEST" != "0" ]; then $DIST_PATH/travis/do_testsuite.sh; fi - if [ "$SDE" = "1" ]; then $DIST_PATH/travis/do_sde.sh; fi + if [ "$TEST" != "0" ]; then $DIST_PATH/ci/do_testsuite.sh; fi + if [ "$SDE" = "1" ]; then $DIST_PATH/ci/do_sde.sh; fi diff --git a/travis/cpuid/excavator.def b/ci/cpuid/excavator.def similarity index 100% rename from travis/cpuid/excavator.def rename to ci/cpuid/excavator.def diff --git a/travis/cpuid/haswell.def b/ci/cpuid/haswell.def similarity index 100% rename from travis/cpuid/haswell.def rename to ci/cpuid/haswell.def diff --git a/travis/cpuid/penryn.def b/ci/cpuid/penryn.def similarity index 100% rename from travis/cpuid/penryn.def rename to ci/cpuid/penryn.def diff --git a/travis/cpuid/piledriver.def b/ci/cpuid/piledriver.def similarity index 100% rename from travis/cpuid/piledriver.def rename to ci/cpuid/piledriver.def diff --git a/travis/cpuid/sandybridge.def b/ci/cpuid/sandybridge.def similarity index 100% rename from travis/cpuid/sandybridge.def rename to ci/cpuid/sandybridge.def diff --git a/travis/cpuid/skx.def b/ci/cpuid/skx.def similarity index 100% rename from travis/cpuid/skx.def rename to ci/cpuid/skx.def diff --git a/travis/cpuid/skx1.def b/ci/cpuid/skx1.def similarity index 100% rename from travis/cpuid/skx1.def rename to ci/cpuid/skx1.def diff --git a/travis/cpuid/steamroller.def b/ci/cpuid/steamroller.def similarity index 100% rename from travis/cpuid/steamroller.def rename to ci/cpuid/steamroller.def diff --git a/travis/cpuid/zen.def b/ci/cpuid/zen.def similarity index 100% rename from travis/cpuid/zen.def rename to ci/cpuid/zen.def diff --git a/travis/cpuid/zen2.def b/ci/cpuid/zen2.def similarity index 100% rename from travis/cpuid/zen2.def rename to ci/cpuid/zen2.def diff --git a/travis/cpuid/zen3.def b/ci/cpuid/zen3.def similarity index 100% rename from travis/cpuid/zen3.def rename to ci/cpuid/zen3.def diff --git a/travis/cxx/Makefile b/ci/cxx/Makefile similarity index 100% rename from travis/cxx/Makefile rename to ci/cxx/Makefile diff --git a/travis/cxx/cxx-test.cxx b/ci/cxx/cxx-test.cxx similarity index 100% rename from travis/cxx/cxx-test.cxx rename to ci/cxx/cxx-test.cxx diff --git a/travis/cxx/cxx-test.sh b/ci/cxx/cxx-test.sh similarity index 93% rename from travis/cxx/cxx-test.sh rename to ci/cxx/cxx-test.sh index c0036611f4..52402867d7 100755 --- a/travis/cxx/cxx-test.sh +++ b/ci/cxx/cxx-test.sh @@ -50,9 +50,9 @@ if [ ! -e $INCLUDE_DIR/blis.h ]; then exit 1 fi -if [ ! -e $SOURCE_DIR/travis/cxx/Makefile ]; then +if [ ! -e $SOURCE_DIR/ci/cxx/Makefile ]; then echo "could not find cxx-test Makefile" exit 1 fi -make -C $SOURCE_DIR/travis/cxx INCLUDE_DIR=$INCLUDE_DIR LIB_DIR=$LIB_DIR BUILD_DIR=$BUILD_DIR +make -C $SOURCE_DIR/ci/cxx INCLUDE_DIR=$INCLUDE_DIR LIB_DIR=$LIB_DIR BUILD_DIR=$BUILD_DIR diff --git a/travis/do_riscv.sh b/ci/do_riscv.sh similarity index 100% rename from travis/do_riscv.sh rename to ci/do_riscv.sh diff --git a/travis/do_sde.sh b/ci/do_sde.sh similarity index 93% rename from travis/do_sde.sh rename to ci/do_sde.sh index 4f0447778a..c713a59a92 100755 --- a/travis/do_sde.sh +++ b/ci/do_sde.sh @@ -38,7 +38,7 @@ LIBC_SO=${TMP%% *} TMP=`ldd ./test_libblis.x | grep libm | sed 's/^.*=> //'` LIBM_SO=${TMP%% *} for LIB in $LD_SO $LIBC_SO $LIBM_SO; do - $DIST_PATH/travis/patch-ld-so.py $LIB .tmp + $DIST_PATH/ci/patch-ld-so.py $LIB .tmp chmod a+x .tmp sudo mv .tmp $LIB done @@ -47,7 +47,7 @@ for ARCH in penryn sandybridge haswell skx knl piledriver steamroller excavator if [ "$ARCH" = "knl" ]; then TESTSUITE_WRAPPER="$SDE -knl --" else - TESTSUITE_WRAPPER="$SDE -cpuid_in $DIST_PATH/travis/cpuid/$ARCH.def --" + TESTSUITE_WRAPPER="$SDE -cpuid_in $DIST_PATH/ci/cpuid/$ARCH.def --" fi make TESTSUITE_WRAPPER="$TESTSUITE_WRAPPER" check diff --git a/travis/do_testsuite.sh b/ci/do_testsuite.sh similarity index 76% rename from travis/do_testsuite.sh rename to ci/do_testsuite.sh index c21df3a32f..aa72d80519 100755 --- a/travis/do_testsuite.sh +++ b/ci/do_testsuite.sh @@ -9,27 +9,32 @@ export BLIS_JR_NT=1 export BLIS_IR_NT=1 if [ "$TEST" = "FAST" -o "$TEST" = "ALL" ]; then - make testblis-fast || cat ./output.testsuite + make testblis-fast + cat ./output.testsuite $DIST_PATH/testsuite/check-blistest.sh ./output.testsuite fi if [ "$TEST" = "MD" -o "$TEST" = "ALL" ]; then - make testblis-md || cat ./output.testsuite + make testblis-md + cat ./output.testsuite $DIST_PATH/testsuite/check-blistest.sh ./output.testsuite fi if [ "$TEST" = "SALT" -o "$TEST" = "ALL" ]; then # Disable multithreading within BLIS. export BLIS_JC_NT=1 BLIS_IC_NT=1 BLIS_JR_NT=1 BLIS_IR_NT=1 - make testblis-salt || cat ./output.testsuite + make testblis-salt + cat ./output.testsuite $DIST_PATH/testsuite/check-blistest.sh ./output.testsuite fi if [ "$TEST" = "1" -o "$TEST" = "ALL" ]; then - make testblis || cat ./output.testsuite + make testblis + cat ./output.testsuite $DIST_PATH/testsuite/check-blistest.sh ./output.testsuite fi -make testblas || cat ./output.testsuite +make testblas +cat ./output.testsuite $DIST_PATH/blastest/check-blastest.sh diff --git a/travis/patch-ld-so.py b/ci/patch-ld-so.py similarity index 100% rename from travis/patch-ld-so.py rename to ci/patch-ld-so.py From c67331f8ef636b535a66b90bc2447c84f29ec638 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Thu, 6 Feb 2025 11:56:42 -0600 Subject: [PATCH 11/25] Edit. --- .circleci/config.yml | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 89019dbfa1..4985af715b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -33,10 +33,17 @@ workflows: # TESTSUITE_WRAPPER: '' # PACKAGES: '' - # full testsuite (all tests + mixed datatype (gemm_nn only) + salt + SDE + OOT) + # full testsuite (all tests + mixed datatype (gemm_nn only) + salt + OOT) - build: OOT: 1 TEST: ALL + SDE: 0 + CONF: x86_64 + + # SDE testing for x86_64 + - build: + OOT: 0 + TEST: FAST SDE: 1 CONF: x86_64 @@ -48,10 +55,14 @@ workflows: - build: THR: pthreads + # + # There is currently an undiagnosed test failure (see #852). + # This test is disabled until the failure can be resolved. + # # clang build - - build: - CC: clang - CXX: clang++ + #- build: + # CC: clang + # CXX: clang++ # macOS with system compiler (clang) - build: From 11478c08ffe936c5e0081af9f319e7064224e6f8 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Thu, 6 Feb 2025 13:36:03 -0600 Subject: [PATCH 12/25] Edit. --- ci/do_sde.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/do_sde.sh b/ci/do_sde.sh index c713a59a92..157d240f66 100755 --- a/ci/do_sde.sh +++ b/ci/do_sde.sh @@ -43,7 +43,8 @@ for LIB in $LD_SO $LIBC_SO $LIBM_SO; do sudo mv .tmp $LIB done -for ARCH in penryn sandybridge haswell skx knl piledriver steamroller excavator zen; do +for ARCH in sandybridge haswell skx knl piledriver steamroller excavator zen; do +#for ARCH in penryn sandybridge haswell skx knl piledriver steamroller excavator zen; do if [ "$ARCH" = "knl" ]; then TESTSUITE_WRAPPER="$SDE -knl --" else From c1910fddd2679931f65969b5de55105aa86afa7b Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Thu, 6 Feb 2025 13:43:24 -0600 Subject: [PATCH 13/25] Edit. --- .circleci/config.yml | 4 ++++ ci/do_sde.sh | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4985af715b..3116f9cdce 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -14,6 +14,9 @@ executors: macos: &macos-executor # macos executor running Xcode macos: xcode: 14.2.0 + linuxvm: # executor type + machine: + image: ubuntu-2204:current workflows: build: @@ -42,6 +45,7 @@ workflows: # SDE testing for x86_64 - build: + os: linuxvm OOT: 0 TEST: FAST SDE: 1 diff --git a/ci/do_sde.sh b/ci/do_sde.sh index 157d240f66..c713a59a92 100755 --- a/ci/do_sde.sh +++ b/ci/do_sde.sh @@ -43,8 +43,7 @@ for LIB in $LD_SO $LIBC_SO $LIBM_SO; do sudo mv .tmp $LIB done -for ARCH in sandybridge haswell skx knl piledriver steamroller excavator zen; do -#for ARCH in penryn sandybridge haswell skx knl piledriver steamroller excavator zen; do +for ARCH in penryn sandybridge haswell skx knl piledriver steamroller excavator zen; do if [ "$ARCH" = "knl" ]; then TESTSUITE_WRAPPER="$SDE -knl --" else From 6ec727cd169bf09a598461b6ffd640381aaad60a Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Thu, 6 Feb 2025 13:56:28 -0600 Subject: [PATCH 14/25] Edit. --- .circleci/config.yml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3116f9cdce..480133c29d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,7 +10,10 @@ branches: executors: linux: # Docker using the Base Convenience Image docker: - - image: 'cimg/base:2024.10' + - image: cimg/base:2024.10 + linuxnew: # Docker using the Base Convenience Image + docker: + - image: cimg/base:current-22.04 macos: &macos-executor # macos executor running Xcode macos: xcode: 14.2.0 @@ -64,9 +67,10 @@ workflows: # This test is disabled until the failure can be resolved. # # clang build - #- build: - # CC: clang - # CXX: clang++ + - build: + os: linuxnew + CC: clang + CXX: clang++ # macOS with system compiler (clang) - build: @@ -184,7 +188,7 @@ jobs: - run: name: Installing Dependencies command: - sudo apt-get update && sudo apt-get install -y clang make python3 << parameters.PACKAGES >> + sudo apt-get update && sudo NEEDRESTART_MODE=a apt-get install -y clang make python3 << parameters.PACKAGES >> - run: name: Configuring, Building, Testing From 52c1f92cb108fcef9183c5681398662c80f5d955 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Thu, 6 Feb 2025 15:05:51 -0600 Subject: [PATCH 15/25] Edit. --- .circleci/config.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 480133c29d..40f4e80274 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -68,9 +68,9 @@ workflows: # # clang build - build: - os: linuxnew - CC: clang - CXX: clang++ + CC: clang-15 + CXX: clang++-15 + PACKAGES: clang-15 # macOS with system compiler (clang) - build: @@ -188,7 +188,7 @@ jobs: - run: name: Installing Dependencies command: - sudo apt-get update && sudo NEEDRESTART_MODE=a apt-get install -y clang make python3 << parameters.PACKAGES >> + sudo apt-get update && sudo NEEDRESTART_MODE=a apt-get install -y make python3 << parameters.PACKAGES >> - run: name: Configuring, Building, Testing From f36a58241c7e0dc773d8989453351a9bceac45bc Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Thu, 6 Feb 2025 19:53:13 -0600 Subject: [PATCH 16/25] Edit. --- .circleci/config.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 40f4e80274..bf0ff226c9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -62,15 +62,11 @@ workflows: - build: THR: pthreads - # - # There is currently an undiagnosed test failure (see #852). - # This test is disabled until the failure can be resolved. - # # clang build - build: - CC: clang-15 - CXX: clang++-15 - PACKAGES: clang-15 + CC: clang + CXX: clang++ + PACKAGES: clang # macOS with system compiler (clang) - build: From d797da3ee6071896803866fd82d5c63b097fcb0d Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Thu, 6 Feb 2025 19:53:22 -0600 Subject: [PATCH 17/25] Fix problem with clang-14.0.0 and reference `gemm` ukr. Details: - clang 14.0.0 apparently makes some invalid assumptions about whether or not the AB microtile is initialized in the `gemm` reference microkernel. This leads to the "scale by alpha" part doing something strange (all sorts of random and even NaN values pop up). I do not know why this only manifested for `ztrsm` on `skx` (in `zgemm_skx_ref` via `zgemmtrsm_skx_ref`). See #852. - Aliasing the AB microtile (in the proper datatype) as a pointer to a raw character array, and then initializing the character array with `= { 0 }` convinces the compiler to do the right thing. - The problem did not occur in 14.0.6 or 15.0.7. It may only be a narrow band of versions which are problematic. - This commit adds the char array workaround and fixes #852. --- ref_kernels/3/bli_gemm_ref.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/ref_kernels/3/bli_gemm_ref.c b/ref_kernels/3/bli_gemm_ref.c index ab861bcb56..61505eef86 100644 --- a/ref_kernels/3/bli_gemm_ref.c +++ b/ref_kernels/3/bli_gemm_ref.c @@ -194,16 +194,15 @@ void PASTEMAC(ch,ch,opname,arch,suf) \ return; \ } \ \ - ctype ab[ BLIS_STACK_BUF_MAX_SIZE \ - / sizeof( ctype ) ] \ - __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const inc_t rs_ab = nr; \ - const inc_t cs_ab = 1; \ -\ - const inc_t rs_a = PASTECH(BLIS_BBM_,ch); \ - const inc_t cs_a = PASTECH(BLIS_PACKMR_,ch); \ - const inc_t rs_b = PASTECH(BLIS_PACKNR_,ch); \ - const inc_t cs_b = PASTECH(BLIS_BBN_,ch); \ + char ab_[ BLIS_STACK_BUF_MAX_SIZE ] __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))) = { 0 }; \ + ctype* ab = (ctype*)ab_; \ + const inc_t rs_ab = nr; \ + const inc_t cs_ab = 1; \ +\ + const inc_t rs_a = PASTECH(BLIS_BBM_,ch); \ + const inc_t cs_a = PASTECH(BLIS_PACKMR_,ch); \ + const inc_t rs_b = PASTECH(BLIS_PACKNR_,ch); \ + const inc_t cs_b = PASTECH(BLIS_BBN_,ch); \ \ \ /* Initialize the accumulator elements in ab to zero. */ \ From e09c9fbd60fb9c958cc21a61ee42cad5fdc91067 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Thu, 6 Feb 2025 20:43:13 -0600 Subject: [PATCH 18/25] Edit. --- ci/do_sde.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/do_sde.sh b/ci/do_sde.sh index c713a59a92..8730830e3b 100755 --- a/ci/do_sde.sh +++ b/ci/do_sde.sh @@ -3,7 +3,7 @@ set -e set -x -SDE_VERSION=sde-external-8.69.1-2021-07-18-lin +SDE_VERSION=sde-external-9.48.0-2024-11-25-lin SDE_TARBALL=$SDE_VERSION.tar.bz2 SDE=$SDE_VERSION/sde64 From fa28fd7308e1fdc7008ad7bf0a282836c1446076 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Thu, 6 Feb 2025 21:04:51 -0600 Subject: [PATCH 19/25] Edit. --- ci/do_sde.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ci/do_sde.sh b/ci/do_sde.sh index 8730830e3b..41b5353b7c 100755 --- a/ci/do_sde.sh +++ b/ci/do_sde.sh @@ -46,6 +46,12 @@ done for ARCH in penryn sandybridge haswell skx knl piledriver steamroller excavator zen; do if [ "$ARCH" = "knl" ]; then TESTSUITE_WRAPPER="$SDE -knl --" + elif [ "$ARCH" = "sandybridge" ]; then + # The sandybridge.def file causes a segfault in SDE on some systems. + # Instead, use the CPUID values for haswell, but force BLIS to use the + # sandybridge configuration. + TESTSUITE_WRAPPER="$SDE -cpuid_in $DIST_PATH/ci/cpuid/haswell.def --" + export BLIS_ARCH_TYPE=4 else TESTSUITE_WRAPPER="$SDE -cpuid_in $DIST_PATH/ci/cpuid/$ARCH.def --" fi From befa9a6d7b20108d93ce2fbc6736a7e147d4685b Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Thu, 6 Feb 2025 23:21:57 -0600 Subject: [PATCH 20/25] Edit. --- ci/do_sde.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/do_sde.sh b/ci/do_sde.sh index 41b5353b7c..668cd5b573 100755 --- a/ci/do_sde.sh +++ b/ci/do_sde.sh @@ -44,6 +44,8 @@ for LIB in $LD_SO $LIBC_SO $LIBM_SO; do done for ARCH in penryn sandybridge haswell skx knl piledriver steamroller excavator zen; do + export BLIS_ARCH_TYPE=-1 + if [ "$ARCH" = "knl" ]; then TESTSUITE_WRAPPER="$SDE -knl --" elif [ "$ARCH" = "sandybridge" ]; then From 21a5d0007611b5e267aa3de5403049db928cc118 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Thu, 6 Feb 2025 23:28:53 -0600 Subject: [PATCH 21/25] Revert "Edit." This reverts commit e09c9fbd60fb9c958cc21a61ee42cad5fdc91067. --- ci/do_sde.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/do_sde.sh b/ci/do_sde.sh index 668cd5b573..3630df4a91 100755 --- a/ci/do_sde.sh +++ b/ci/do_sde.sh @@ -3,7 +3,7 @@ set -e set -x -SDE_VERSION=sde-external-9.48.0-2024-11-25-lin +SDE_VERSION=sde-external-8.69.1-2021-07-18-lin SDE_TARBALL=$SDE_VERSION.tar.bz2 SDE=$SDE_VERSION/sde64 From 1c29e3c45cea72da8dce139f2b7f8b852134c041 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Sat, 8 Feb 2025 00:49:14 -0600 Subject: [PATCH 22/25] Edit. --- ci/do_sde.sh | 25 +++++++++++++------------ ci/patch-ld-so.py | 16 ---------------- 2 files changed, 13 insertions(+), 28 deletions(-) delete mode 100755 ci/patch-ld-so.py diff --git a/ci/do_sde.sh b/ci/do_sde.sh index 3630df4a91..7f8a927f67 100755 --- a/ci/do_sde.sh +++ b/ci/do_sde.sh @@ -31,18 +31,6 @@ tar xvf $SDE_TARBALL make -j2 testsuite-bin blastest-bin -TMP=`ldd ./test_libblis.x | grep ld | sed 's/^.*=> //'` -LD_SO=${TMP%% *} -TMP=`ldd ./test_libblis.x | grep libc | sed 's/^.*=> //'` -LIBC_SO=${TMP%% *} -TMP=`ldd ./test_libblis.x | grep libm | sed 's/^.*=> //'` -LIBM_SO=${TMP%% *} -for LIB in $LD_SO $LIBC_SO $LIBM_SO; do - $DIST_PATH/ci/patch-ld-so.py $LIB .tmp - chmod a+x .tmp - sudo mv .tmp $LIB -done - for ARCH in penryn sandybridge haswell skx knl piledriver steamroller excavator zen; do export BLIS_ARCH_TYPE=-1 @@ -54,6 +42,19 @@ for ARCH in penryn sandybridge haswell skx knl piledriver steamroller excavator # sandybridge configuration. TESTSUITE_WRAPPER="$SDE -cpuid_in $DIST_PATH/ci/cpuid/haswell.def --" export BLIS_ARCH_TYPE=4 + elif [ "$ARCH" = "piledriver" ]; then + # We used to "patch" ld.so and libm to remove CPUID checks so that glibc + # wouldn't try to use instructions not supported by SDE (FMA4). That no + # longer works, so test Piledriver/Steamroller/Excavator as haswell + # but with the configuration forced via environment variable. + TESTSUITE_WRAPPER="$SDE -cpuid_in $DIST_PATH/ci/cpuid/haswell.def --" + export BLIS_ARCH_TYPE=11 + elif [ "$ARCH" = "steamroller" ]; then + TESTSUITE_WRAPPER="$SDE -cpuid_in $DIST_PATH/ci/cpuid/haswell.def --" + export BLIS_ARCH_TYPE=10 + elif [ "$ARCH" = "excavator" ]; then + TESTSUITE_WRAPPER="$SDE -cpuid_in $DIST_PATH/ci/cpuid/haswell.def --" + export BLIS_ARCH_TYPE=9 else TESTSUITE_WRAPPER="$SDE -cpuid_in $DIST_PATH/ci/cpuid/$ARCH.def --" fi diff --git a/ci/patch-ld-so.py b/ci/patch-ld-so.py deleted file mode 100755 index 72e580d745..0000000000 --- a/ci/patch-ld-so.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env python - -# -# Patch ld.so to disable runtime CPUID detection -# Taken from https://stackoverflow.com/a/44483482 -# - -import re -import sys - -infile, outfile = sys.argv[1:] -d = open(infile, 'rb').read() -# Match CPUID(eax=0), "xor eax,eax" followed closely by "cpuid" -o = re.sub(b'(\x31\xc0.{0,32})\x0f\xa2', b'\\1\x66\x90', d) -#assert d != o -open(outfile, 'wb').write(o) From dc3fea1302b12d58d940e4ca352a2645c3c658c5 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Sat, 8 Feb 2025 13:35:56 -0600 Subject: [PATCH 23/25] Edit [ci skip]. --- .travis.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index bdfafb6b0f..df955764f8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -100,19 +100,19 @@ script: - if [ $OOT -eq 1 ]; then export DIST_PATH=`pwd`; mkdir ../oot; cd ../oot; chmod -R a-w $DIST_PATH; fi - pwd - if [ "$CONF" = "rv64iv" ]; then - $DIST_PATH/travis/do_riscv.sh "$CONF"; + $DIST_PATH/ci/do_riscv.sh "$CONF"; export CC=$DIST_PATH/../toolchain/riscv/bin/riscv64-unknown-linux-gnu-gcc; export CXX=$DIST_PATH/../toolchain/riscv/bin/riscv64-unknown-linux-gnu-g++; export TESTSUITE_WRAPPER="$DIST_PATH/../toolchain/qemu-riscv64 -cpu rv64,vext_spec=v1.0,v=true,vlen=128 -B 0x100000"; fi - if [ "$CONF" = "rv32iv" ]; then - $DIST_PATH/travis/do_riscv.sh "$CONF"; + $DIST_PATH/ci/do_riscv.sh "$CONF"; export CC=$DIST_PATH/../toolchain/riscv/bin/riscv32-unknown-linux-gnu-gcc; export CXX=$DIST_PATH/../toolchain/riscv/bin/riscv32-unknown-linux-gnu-g++; export TESTSUITE_WRAPPER="$DIST_PATH/../toolchain/qemu-riscv32 -cpu rv32,vext_spec=v1.0,v=true,vlen=128 -B 0x100000"; fi - if [ "$CONF" = "sifive_x280" ]; then - $DIST_PATH/travis/do_riscv.sh "$CONF"; + $DIST_PATH/ci/do_riscv.sh "$CONF"; export CC=$DIST_PATH/../toolchain/riscv/bin/clang; export CXX=$DIST_PATH/../toolchain/riscv/bin/clang++; export TESTSUITE_WRAPPER="$DIST_PATH/../toolchain/qemu-riscv64 -cpu rv64,vext_spec=v1.0,v=true,vlen=512 -B 0x100000"; @@ -124,9 +124,9 @@ script: - $CC -v - make -j 2 - make install -- if [ "$BLD" = "" ]; then $DIST_PATH/travis/cxx/cxx-test.sh $DIST_PATH $(ls -1 include); fi +- if [ "$BLD" = "" ]; then $DIST_PATH/ci/cxx/cxx-test.sh $DIST_PATH $(ls -1 include); fi # Qemu SVE is failing sgemmt in some cases. Skip as this issue is not observed # on real chip (A64fx). - if [ "$CONF" = "armsve" ]; then sed -i 's/.*\.*/0/' $DIST_PATH/testsuite/input.operations.fast; fi -- if [ "$TEST" != "0" ]; then travis_wait 30 $DIST_PATH/travis/do_testsuite.sh; fi -- if [ "$SDE" = "1" ]; then travis_wait 30 $DIST_PATH/travis/do_sde.sh; fi +- if [ "$TEST" != "0" ]; then travis_wait 30 $DIST_PATH/ci/do_testsuite.sh; fi +- if [ "$SDE" = "1" ]; then travis_wait 30 $DIST_PATH/ci/do_sde.sh; fi From 1bccce868a907e7452fbc1cb327d2701aa481eb5 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Sat, 8 Feb 2025 13:41:43 -0600 Subject: [PATCH 24/25] Edit [ci skip]. --- .circleci/config.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index bf0ff226c9..eb3f385a35 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5,7 +5,6 @@ branches: - master - dev - amd - - circleci executors: linux: # Docker using the Base Convenience Image From 57747b53d67f62ed34c9207f826a3bc1c2fd1ab4 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Sat, 8 Feb 2025 13:43:13 -0600 Subject: [PATCH 25/25] Edit [ci skip]. --- .circleci/config.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index eb3f385a35..339fc11cf0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -25,18 +25,17 @@ workflows: jobs: # Default: # - build: - # parameters: - # os: linux - # CC: gcc - # OOT: 0 - # TEST: FAST - # SDE: 0 - # THR: none - # CONF: auto - # BLD: '' - # LDFLAGS: '' - # TESTSUITE_WRAPPER: '' - # PACKAGES: '' + # os: linux + # CC: gcc + # OOT: 0 + # TEST: FAST + # SDE: 0 + # THR: none + # CONF: auto + # BLD: '' + # LDFLAGS: '' + # TESTSUITE_WRAPPER: '' + # PACKAGES: '' # full testsuite (all tests + mixed datatype (gemm_nn only) + salt + OOT) - build: @@ -47,6 +46,7 @@ workflows: # SDE testing for x86_64 - build: + # linuxvm must be used because it provides 8G RAM and SDE fails with 4G RAM os: linuxvm OOT: 0 TEST: FAST