Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
fad61c3
Add new level-0 macro layer.
devinamatthews Nov 3, 2024
f35f2c2
Fix problem with GEMMTRSM edge-case macros (affects haswell predomina…
devinamatthews Nov 3, 2024
08c1d99
Use temporaries in level-0 axpys and scal2s.
devinamatthews Nov 3, 2024
4b6cad0
Fix some places where old level-0 macros were still used.
devinamatthews Nov 4, 2024
6970f23
Make it easier to override CXXLANGFLAGS.
devinamatthews Nov 4, 2024
f527dcc
Fix some bugs in the new level-0 macros.
devinamatthews Nov 4, 2024
662c162
Add testsuite for new level-0 macros (C++ based).
devinamatthews Nov 4, 2024
9c00ded
Fix remaining errors in level-0 macros and tests.
devinamatthews Nov 4, 2024
15d36b1
Work around preprocessor bug in gcc 11 and older.
devinamatthews Nov 4, 2024
97da69c
Update Multithreading.md
devinamatthews Nov 15, 2024
7eb841c
Comment update.
fgvanzee Nov 25, 2024
ab61724
Undo changes to optimized kernels and vendor code. A compatibility la…
devinamatthews Jan 22, 2025
f1534a7
Add compatibility layer for old code such as optimized kernels.
devinamatthews Jan 22, 2025
ddb9068
Fix some casting problems in level-0 macros.
devinamatthews Feb 26, 2025
9d1d368
Check const-correctness in level-0 macros.
devinamatthews Feb 26, 2025
d757d41
Merge branch 'master' into new-level0
devinamatthews Feb 26, 2025
a686d1b
Add level-0 macro testing to CI.
devinamatthews Feb 26, 2025
106f585
Error out on missing function declaration. [ci skip] for now.
devinamatthews Feb 26, 2025
7f6d81c
Add more legacy macros needed by x86 kernels.
devinamatthews Feb 27, 2025
78bf23a
One last legacy macro, and disable check for CI correctness.
devinamatthews Feb 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
13 changes: 9 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ workflows:
# OOT: 0
# TEST: FAST
# SDE: 0
# LEVEL0: 0
# THR: none
# CONF: auto
# BLD: ''
Expand All @@ -41,20 +42,18 @@ workflows:
- build:
OOT: 1
TEST: ALL
SDE: 0
CONF: x86_64

# SDE testing for x86_64
- build:
# linuxvm must be used because it provides 8G RAM and SDE fails with 4G RAM
os: linuxvm
OOT: 0
TEST: FAST
SDE: 1
CONF: x86_64

# openmp build
# openmp build + LEVEL0
- build:
LEVEL0: 1
THR: openmp

# pthreads build
Expand Down Expand Up @@ -153,6 +152,9 @@ jobs:
SDE:
type: integer
default: 0
LEVEL0:
type: integer
default: 0
THR:
type: string
default: none
Expand Down Expand Up @@ -197,6 +199,7 @@ jobs:
export BLD="<< parameters.BLD >>"
export LDFLAGS="<< parameters.LDFLAGS >>"
export SDE="<< parameters.SDE >>"
export LEVEL0="<< parameters.LEVEL0 >>"
export THR="<< parameters.THR >>"
export TESTSUITE_WRAPPER="<< parameters.TESTSUITE_WRAPPER >>"

Expand Down Expand Up @@ -232,6 +235,7 @@ jobs:
echo "TEST = $TEST"
echo "BLD = $BLD"
echo "SDE = $SDE"
echo "LEVEL0 = $LEVEL0"
echo "DIST_PATH = $DIST_PATH"
echo "LDFLAGS = $LDFLAGS"
echo "TESTSUITE_WRAPPER = $TESTSUITE_WRAPPER"
Expand All @@ -251,3 +255,4 @@ jobs:
if [ "$CONF" = "armsve" ]; then sed -i 's/.*\<gemmt\>.*/0/' $DIST_PATH/testsuite/input.operations.fast; fi
if [ "$TEST" != "0" ]; then $DIST_PATH/ci/do_testsuite.sh; fi
if [ "$SDE" = "1" ]; then $DIST_PATH/ci/do_sde.sh; fi
if [ "$LEVEL0" = "1" ]; then $DIST_PATH/ci/do_level0.sh; fi
2 changes: 1 addition & 1 deletion blastest/src/dblat1.c
Original file line number Diff line number Diff line change
Expand Up @@ -1044,7 +1044,7 @@ static real c_b81 = 0.f;

/* Local variables */
real sd;
extern real s_epsilon_();
extern real s_epsilon_(real *);

/* Fortran I/O blocks */
static cilist io___125 = { 0, 6, 0, fmt_99999, 0 };
Expand Down
20 changes: 10 additions & 10 deletions build/libblis-symbols.def
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ bli_cger_ex
bli_cgetijm
bli_cgetijv
bli_cgetsc
bli_cgtesc
bli_cgesc
bli_cgtsc
bli_check_error_code_helper
bli_chemm
Expand Down Expand Up @@ -149,7 +149,7 @@ bli_cinvscalv
bli_cinvscalv_ex
bli_clock
bli_clock_min_diff
bli_cltesc
bli_clesc
bli_cltsc
bli_cmachval
bli_cmkherm
Expand Down Expand Up @@ -351,7 +351,7 @@ bli_dger_ex
bli_dgetijm
bli_dgetijv
bli_dgetsc
bli_dgtesc
bli_dgesc
bli_dgtsc
bli_dhemm
bli_dhemm_ex
Expand All @@ -377,7 +377,7 @@ bli_dinvscalm_ex
bli_dinvscalv
bli_dinvscalv_ex
bli_divsc
bli_dltesc
bli_dlesc
bli_dltsc
bli_dmachval
bli_dmkherm
Expand Down Expand Up @@ -532,7 +532,7 @@ bli_gks_l3_ukr_impl_type
bli_gks_query_cntx
bli_gks_query_ind_cntx
bli_gks_query_nat_cntx
bli_gtesc
bli_gesc
bli_gtsc
bli_hemm
bli_hemm_ex
Expand Down Expand Up @@ -630,7 +630,7 @@ bli_iprintm
bli_iprintv
bli_isetsc
bli_l3_thrinfo_create
bli_ltesc
bli_lesc
bli_ltsc
bli_machval
bli_malloc_user
Expand Down Expand Up @@ -824,7 +824,7 @@ bli_sger_ex
bli_sgetijm
bli_sgetijv
bli_sgetsc
bli_sgtesc
bli_sgesc
bli_sgtsc
bli_shemm
bli_shemm_ex
Expand Down Expand Up @@ -852,7 +852,7 @@ bli_sinvscalm_ex
bli_sinvscalv
bli_sinvscalv_ex
bli_sleep
bli_sltesc
bli_slesc
bli_sltsc
bli_smachval
bli_smkherm
Expand Down Expand Up @@ -1097,7 +1097,7 @@ bli_zger_ex
bli_zgetijm
bli_zgetijv
bli_zgetsc
bli_zgtesc
bli_zgesc
bli_zgtsc
bli_zhemm
bli_zhemm_ex
Expand All @@ -1123,7 +1123,7 @@ bli_zinvscalm_ex
bli_zinvscalv
bli_zinvscalv_ex
bli_zipsc
bli_zltesc
bli_zlesc
bli_zltsc
bli_zmachval
bli_zmkherm
Expand Down
2 changes: 1 addition & 1 deletion build/plugin/my_kernel_1_ref.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ void PASTEMAC(ch,opname,arch,suf) \
\
for ( dim_t i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,copys)( *a, x[ i ] ); \
bli_tcopys( ch,ch, *a, x[ i ] ); \
} \
}

Expand Down
4 changes: 2 additions & 2 deletions build/plugin/my_kernel_2_ref.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ void PASTEMAC(ch,opname,arch,suf) \
{ \
for ( dim_t i = 0; i < m; ++i ) \
{ \
PASTEMAC(ch,seti0s)( a[ i*n + j ] ); \
bli_tseti0s( ch, a[ i*n + j ] ); \
} \
} \
} \
Expand All @@ -66,7 +66,7 @@ void PASTEMAC(ch,opname,arch,suf) \
{ \
for ( dim_t j = 0; j < n; ++j ) \
{ \
PASTEMAC(ch,seti0s)( a[ i + j*m ] ); \
bli_tseti0s( ch, a[ i + j*m ] ); \
} \
} \
} \
Expand Down
16 changes: 16 additions & 0 deletions ci/do_level0.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

set -e
set -x

: ${SRCDIR:=../..}

if ! [ -d test/level0 ]; then
mkdir -p test/level0
ln -s $SRCDIR/test/level0/* test/level0/
fi

cd test/level0
make -j2

./test_l0.x
9 changes: 6 additions & 3 deletions common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ $(eval $(call store-var-for,CC, $(1)))
$(eval $(call store-var-for,CC_VENDOR, $(1)))
$(eval $(call store-var-for,CPPROCFLAGS,$(1)))
$(eval $(call store-var-for,CLANGFLAGS, $(1)))
$(eval $(call store-var-for,CXXLANGFLAGS,$(1)))
$(eval $(call store-var-for,CMISCFLAGS, $(1)))
$(eval $(call store-var-for,CPICFLAGS, $(1)))
$(eval $(call store-var-for,CWARNFLAGS, $(1)))
Expand Down Expand Up @@ -109,8 +108,8 @@ get-noopt-cxxflags-for = $(strip $(CXXFLAGS_PRESET) \
$(call load-var-for,CWARNFLAGS,$(1)) \
$(call load-var-for,CPICFLAGS,$(1)) \
$(call load-var-for,CMISCFLAGS,$(1)) \
$(call load-var-for,CXXLANGFLAGS,$(1)) \
$(call load-var-for,CPPROCFLAGS,$(1)) \
$(CXXLANGFLAGS) \
$(CTHREADFLAGS) \
$(CXXTHREADFLAGS) \
$(CINCFLAGS) \
Expand Down Expand Up @@ -776,6 +775,11 @@ endif

CWARNFLAGS :=

# Do not allow functions with implicit definitions to be called
ifneq ($(CC_VENDOR),ibm)
CWARNFLAGS += -Werror=implicit-function-declaration
endif

# Disable unused function warnings and stop compiling on first error for
# all compilers that accept such options: gcc, clang, and icc.
ifneq ($(CC_VENDOR),ibm)
Expand Down Expand Up @@ -914,7 +918,6 @@ endif
else
CXXLANGFLAGS :=
endif
$(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CXXLANGFLAGS,$(c))))

# --- C Preprocessor flags ---

Expand Down
14 changes: 7 additions & 7 deletions config/template/kernels/1/bli_axpyv_template_noopt_var1.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ void bli_zaxpyv_template_noopt

if ( bli_zero_dim1( n ) ) return;

if ( bli_zeq0( *alpha ) ) return;
if ( bli_teq0s( z, *alpha ) ) return;


// If there is anything that would interfere with our use of aligned
Expand Down Expand Up @@ -179,7 +179,7 @@ void bli_zaxpyv_template_noopt
// Compute front edge cases if x and y were unaligned.
for ( i = 0; i < n_pre; ++i )
{
bli_zaxpys( *alpha, *xp, *yp );
bli_taxpys( z,z,z,z, *alpha, *xp, *yp );

xp += 1; yp += 1;
}
Expand All @@ -188,7 +188,7 @@ void bli_zaxpyv_template_noopt
// yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE.
for ( i = 0; i < n_iter; ++i )
{
bli_zaxpys( *alpha, *xp, *yp );
bli_taxpys( z,z,z,z, *alpha, *xp, *yp );

xp += n_elem_per_iter;
yp += n_elem_per_iter;
Expand All @@ -197,7 +197,7 @@ void bli_zaxpyv_template_noopt
// Compute tail edge cases, if applicable.
for ( i = 0; i < n_left; ++i )
{
bli_zaxpys( *alpha, *xp, *yp );
bli_taxpys( z,z,z,z, *alpha, *xp, *yp );

xp += 1; yp += 1;
}
Expand All @@ -207,7 +207,7 @@ void bli_zaxpyv_template_noopt
// Compute front edge cases if x and y were unaligned.
for ( i = 0; i < n_pre; ++i )
{
bli_zaxpyjs( *alpha, *xp, *yp );
bli_taxpyjs( z,z,z,z, *alpha, *xp, *yp );

xp += 1; yp += 1;
}
Expand All @@ -216,7 +216,7 @@ void bli_zaxpyv_template_noopt
// yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE.
for ( i = 0; i < n_iter; ++i )
{
bli_zaxpyjs( *alpha, *xp, *yp );
bli_taxpyjs( z,z,z,z, *alpha, *xp, *yp );

xp += n_elem_per_iter;
yp += n_elem_per_iter;
Expand All @@ -225,7 +225,7 @@ void bli_zaxpyv_template_noopt
// Compute tail edge cases, if applicable.
for ( i = 0; i < n_left; ++i )
{
bli_zaxpyjs( *alpha, *xp, *yp );
bli_taxpyjs( z,z,z,z, *alpha, *xp, *yp );

xp += 1; yp += 1;
}
Expand Down
20 changes: 10 additions & 10 deletions config/template/kernels/1/bli_dotv_template_noopt_var1.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ void bli_zdotv_template_noopt
// If the vector lengths are zero, set rho to zero and return.
if ( bli_zero_dim1( n ) )
{
bli_zset0s( *rho );
bli_tset0s( z, *rho );
return;
}

Expand Down Expand Up @@ -185,7 +185,7 @@ void bli_zdotv_template_noopt


// Initialize accumulator to zero.
bli_zset0s( dotxy );
bli_tset0s( z, dotxy );


conjx_use = conjx;
Expand All @@ -204,7 +204,7 @@ void bli_zdotv_template_noopt
// Compute front edge cases if x and y were unaligned.
for ( i = 0; i < n_pre; ++i )
{
bli_zdots( *xp, *yp, dotxy );
bli_tdots( z,z,z,z, *xp, *yp, dotxy );

xp += 1; yp += 1;
}
Expand All @@ -213,7 +213,7 @@ void bli_zdotv_template_noopt
// yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE.
for ( i = 0; i < n_iter; ++i )
{
bli_zdots( *xp, *yp, dotxy );
bli_tdots( z,z,z,z, *xp, *yp, dotxy );

xp += n_elem_per_iter;
yp += n_elem_per_iter;
Expand All @@ -222,7 +222,7 @@ void bli_zdotv_template_noopt
// Compute tail edge cases, if applicable.
for ( i = 0; i < n_left; ++i )
{
bli_zdots( *xp, *yp, dotxy );
bli_tdots( z,z,z,z, *xp, *yp, dotxy );

xp += 1; yp += 1;
}
Expand All @@ -232,7 +232,7 @@ void bli_zdotv_template_noopt
// Compute front edge cases if x and y were unaligned.
for ( i = 0; i < n_pre; ++i )
{
bli_zdotjs( *xp, *yp, dotxy );
bli_tdotjs( z,z,z,z, *xp, *yp, dotxy );

xp += 1; yp += 1;
}
Expand All @@ -241,7 +241,7 @@ void bli_zdotv_template_noopt
// yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE.
for ( i = 0; i < n_iter; ++i )
{
bli_zdotjs( *xp, *yp, dotxy );
bli_tdotjs( z,z,z,z, *xp, *yp, dotxy );

xp += n_elem_per_iter;
yp += n_elem_per_iter;
Expand All @@ -250,7 +250,7 @@ void bli_zdotv_template_noopt
// Compute tail edge cases, if applicable.
for ( i = 0; i < n_left; ++i )
{
bli_zdotjs( *xp, *yp, dotxy );
bli_tdotjs( z,z,z,z, *xp, *yp, dotxy );

xp += 1; yp += 1;
}
Expand All @@ -259,8 +259,8 @@ void bli_zdotv_template_noopt
// If conjugation on y was requested, we induce it by conjugating
// the contents of dotxy.
if ( bli_is_conj( conjy ) )
bli_zconjs( dotxy );
bli_tconjs( z, dotxy );

bli_zcopys( dotxy, *rho );
bli_tcopys( z,z, dotxy, *rho );
}

Loading