From 4648c3661fa68c864eb53c0594b1e9f38d1de224 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 26 Oct 2022 18:10:26 -0500 Subject: [PATCH 1/2] Fix auto-detection of firestorm (Apple M1). --- frame/base/bli_cpuid.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/frame/base/bli_cpuid.c b/frame/base/bli_cpuid.c index 527db1f5d7..d967cc05d6 100644 --- a/frame/base/bli_cpuid.c +++ b/frame/base/bli_cpuid.c @@ -781,7 +781,7 @@ uint32_t bli_cpuid_query if ( bli_cpuid_has_features( ecx, FEATURE_MASK_AVX ) ) *features |= FEATURE_AVX; if ( bli_cpuid_has_features( ecx, FEATURE_MASK_FMA3 ) ) *features |= FEATURE_FMA3; - // Check whether the hardware supports xsave/xrestor/xsetbv/xgetbv AND + // Check whether the hardware supports xsave/xrestor/xsetbv/xgetbv AND // support for these is enabled by the OS. If so, then we proceed with // checking that various register-state saving features are available. if ( bli_cpuid_has_features( ecx, FEATURE_MASK_XGETBV ) ) @@ -813,7 +813,7 @@ uint32_t bli_cpuid_query // The OS can manage the state of 512-bit zmm (AVX-512) registers // only if the xcr[7:5] bits are set. If they are not set, then - // clear all feature bits related to AVX-512. + // clear all feature bits related to AVX-512. if ( !bli_cpuid_has_features( eax, XGETBV_MASK_XMM | XGETBV_MASK_YMM | XGETBV_MASK_ZMM ) ) @@ -829,7 +829,7 @@ uint32_t bli_cpuid_query // The OS can manage the state of 256-bit ymm (AVX) registers // only if the xcr[2] bit is set. If it is not set, then - // clear all feature bits related to AVX. + // clear all feature bits related to AVX. if ( !bli_cpuid_has_features( eax, XGETBV_MASK_XMM | XGETBV_MASK_YMM ) ) { @@ -842,7 +842,7 @@ uint32_t bli_cpuid_query // The OS can manage the state of 128-bit xmm (SSE) registers // only if the xcr[1] bit is set. If it is not set, then // clear all feature bits related to SSE (which means the - // entire bitfield is clear). + // entire bitfield is clear). if ( !bli_cpuid_has_features( eax, XGETBV_MASK_XMM ) ) { *features = 0; @@ -1025,6 +1025,7 @@ static uint32_t get_coretype { int implementer = 0x00, part = 0x000; *features = FEATURE_NEON; + bool has_sve = FALSE; #ifdef __linux__ if ( getauxval( AT_HWCAP ) & HWCAP_CPUID ) @@ -1033,7 +1034,7 @@ static uint32_t get_coretype // /sys/devices/system/cpu/cpu0/regs/identification/midr_el1 // and split out in /proc/cpuinfo (with a tab before the colon): // CPU part : 0x0a1 - + uint64_t midr_el1; __asm("mrs %0, MIDR_EL1" : "=r" (midr_el1)); /* @@ -1047,8 +1048,8 @@ static uint32_t get_coretype implementer = (midr_el1 >> 24) & 0xFF; part = (midr_el1 >> 4) & 0xFFF; } - - bool has_sve = getauxval( AT_HWCAP ) & HWCAP_SVE; + + has_sve = getauxval( AT_HWCAP ) & HWCAP_SVE; if (has_sve) *features |= FEATURE_SVE; #endif //__linux__ @@ -1097,7 +1098,7 @@ static uint32_t get_coretype // CAVIUM_CPU_PART_THUNDERX2 0x0AF // CAVIUM_CPU_PART_THUNDERX3 0x0B8 // taken from OpenBLAS // - // BRCM_CPU_PART_BRAHMA_B53 0x100 + // BRCM_CPU_PART_BRAHMA_B53 0x100 // BRCM_CPU_PART_VULCAN 0x516 // // QCOM_CPU_PART_FALKOR_V1 0x800 @@ -1210,7 +1211,7 @@ uint32_t bli_cpuid_query #elif defined(__arm__) || defined(_M_ARM) || defined(_ARCH_PPC) -/* +/* I can't easily find documentation to do this as for aarch64, though it presumably could be unearthed from Linux code. However, on Linux 5.2 (and Androids's 3.4), /proc/cpuinfo has this sort of From d503703a728c8f77401d68054185229a24d814e8 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 26 Oct 2022 18:20:00 -0500 Subject: [PATCH 2/2] Add check to disable armsve on Apple M1. --- configure | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/configure b/configure index a53f25380e..37399fbde2 100755 --- a/configure +++ b/configure @@ -1335,6 +1335,17 @@ blacklistbu_add() fi } +blacklistos_add() +{ + # Check whether we've already blacklisted the given sub-config so + # we don't output redundant messages. + if [ $(is_in_list "$1" "${config_blist}") == "false" ]; then + + echowarn "The operating system does not support building '$1'; adding to blacklist." + config_blist="${config_blist} $1" + fi +} + blacklist_init() { config_blist="" @@ -1989,6 +2000,13 @@ check_assembler() fi } +check_os() +{ + if [[ "$(uname -s)" == "Darwin" && "$(uname -m)" == "arm64" ]]; then + blacklistos_add "armsve" + fi +} + try_assemble() { local cc cflags asm_src asm_base asm_bin rval @@ -2886,6 +2904,9 @@ main() get_binutils_version check_assembler + # Check if there is any incompatibility due to the operating system. + check_os + # Remove duplicates and whitespace from the blacklist. blacklist_cleanup