From fad61c37e70d9d8489fdd28d6966467f7b02b7fd Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Sat, 2 Nov 2024 21:18:49 -0500 Subject: [PATCH 01/19] Add new level-0 macro layer. Details: - Developed by @fgvanzee and @devinamatthews. - Level-0 scalar macros have moved from a named-based system (e.g. `bli_dcopys( ... )`) to a macro argument-based system (`bli_tcopys( d,d, ... )`). - All macros are explicitly mixed-type. - All input and output operands can have a distinct type (precision and/or domain). Unnecessary computations and spurious NaN/Inf propagation are avoided in mixed-domain cases. - All macros which do math (i.e. not copy/set/etc.) take an additional computational precision. - Tile-level macros, 1m, broadcast-B, and other extensions are also included. - All macros should correctly handle aliasing of input and output operands (this needs to be rigorously checked). - The macros work generically over the defined types -- new types only need limited support (primarily conversion to other types and basic math). --- blastest/src/dblat1.c | 2 +- build/libblis-symbols.def | 20 +- build/plugin/my_kernel_1_ref.c | 2 +- build/plugin/my_kernel_2_ref.c | 4 +- .../kernels/1/bli_axpyv_template_noopt_var1.c | 2 +- .../kernels/1/bli_dotv_template_noopt_var1.c | 2 +- .../1f/bli_axpyf_template_noopt_var1.c | 4 +- .../1f/bli_dotaxpyv_template_noopt_var1.c | 2 +- .../1f/bli_dotxaxpyf_template_noopt_var1.c | 4 +- .../kernels/3/bli_gemm_template_noopt_mxn.c | 14 +- .../kernels/3/bli_trsm_l_template_noopt_mxn.c | 2 +- .../kernels/3/bli_trsm_u_template_noopt_mxn.c | 2 +- docs/Multithreading.md | 2 +- frame/0/bli_l0_tapi.c | 40 +- frame/0/copysc/bli_copysc.c | 4 +- frame/1d/bli_l1d_tapi.c | 2 +- frame/1m/bli_l1m_tapi.c | 8 +- frame/1m/bli_l1m_unb_var1.c | 12 +- frame/1m/packm/bli_packm_struc_cxk.c | 4 +- frame/2/bli_l2_tapi.c | 16 +- frame/2/gemv/amd/bli_gemv_unf_var2_amd.c | 4 +- frame/2/gemv/bli_gemv_unb_var2.c | 6 +- frame/2/gemv/bli_gemv_unf_var2.c | 2 +- frame/2/ger/bli_ger_unb_var1.c | 4 +- frame/2/ger/bli_ger_unb_var2.c | 4 +- frame/2/hemv/bli_hemv_unb_var1.c | 12 +- frame/2/hemv/bli_hemv_unb_var2.c | 12 +- frame/2/hemv/bli_hemv_unb_var3.c | 12 +- frame/2/hemv/bli_hemv_unb_var4.c | 12 +- frame/2/hemv/bli_hemv_unf_var1.c | 20 +- frame/2/hemv/bli_hemv_unf_var1a.c | 14 +- frame/2/hemv/bli_hemv_unf_var3.c | 20 +- frame/2/hemv/bli_hemv_unf_var3a.c | 14 +- frame/2/her/bli_her_unb_var1.c | 16 +- frame/2/her/bli_her_unb_var2.c | 16 +- frame/2/her2/bli_her2_unb_var1.c | 26 +- frame/2/her2/bli_her2_unb_var2.c | 26 +- frame/2/her2/bli_her2_unb_var3.c | 26 +- frame/2/her2/bli_her2_unb_var4.c | 26 +- frame/2/her2/bli_her2_unf_var1.c | 26 +- frame/2/her2/bli_her2_unf_var4.c | 26 +- frame/2/trmv/bli_trmv_unb_var1.c | 16 +- frame/2/trmv/bli_trmv_unb_var2.c | 16 +- frame/2/trmv/bli_trmv_unf_var1.c | 28 +- frame/2/trmv/bli_trmv_unf_var2.c | 24 +- frame/2/trsv/bli_trsv_unb_var1.c | 12 +- frame/2/trsv/bli_trsv_unb_var2.c | 12 +- frame/2/trsv/bli_trsv_unf_var1.c | 24 +- frame/2/trsv/bli_trsv_unf_var2.c | 20 +- frame/3/bli_l3_sup_var12.c | 4 +- frame/3/gemmt/bli_gemmt_l_ker_var2.c | 6 +- frame/3/gemmt/bli_gemmt_l_ker_var2b.c | 6 +- frame/3/gemmt/bli_gemmt_u_ker_var2.c | 6 +- frame/3/gemmt/bli_gemmt_u_ker_var2b.c | 6 +- frame/base/bli_machval.c | 2 +- frame/base/bli_obj.c | 8 +- frame/base/bli_query.c | 4 +- frame/base/bli_setgetijm.c | 4 +- frame/base/bli_setgetijv.c | 4 +- frame/base/cast/bli_castm.c | 8 +- frame/base/cast/bli_castnzm.c | 8 +- frame/base/cast/bli_castv.c | 8 +- frame/compat/amd/bla_gemv_amd.c | 2 +- frame/compat/bla_dot.c | 4 +- frame/compat/bla_her2k.c | 8 +- frame/compat/bla_herk.c | 8 +- frame/compat/bla_scal.c | 4 +- frame/compat/f2c/bla_gbmv.c | 144 +- frame/compat/f2c/bla_hbmv.c | 256 +- frame/compat/f2c/bla_hpmv.c | 256 +- frame/compat/f2c/bla_hpr.c | 128 +- frame/compat/f2c/bla_hpr2.c | 224 +- frame/compat/f2c/bla_rot.c | 180 +- frame/compat/f2c/bla_rotg.c | 44 +- frame/compat/f2c/bla_tbmv.c | 288 +- frame/compat/f2c/bla_tbsv.c | 240 +- frame/compat/f2c/bla_tpmv.c | 288 +- frame/compat/f2c/bla_tpsv.c | 240 +- frame/compat/f2c/util/bla_c_div.c | 4 +- frame/compat/f2c/util/bla_d_cnjg.c | 2 +- frame/compat/f2c/util/bla_r_cnjg.c | 2 +- frame/compat/f2c/util/bla_z_div.c | 4 +- frame/include/bli_cast_macro_defs.h | 529 +++ frame/include/bli_complex_macro_defs.h | 2 + frame/include/bli_edge_case_macro_defs.h | 5 +- frame/include/bli_genarray_macro_defs.h | 10 + frame/include/bli_gentfunc_macro_defs.h | 12 +- frame/include/bli_gentprot_macro_defs.h | 9 + frame/include/bli_macro_defs.h | 1 + frame/include/bli_misc_macro_defs.h | 37 +- frame/include/bli_scalar_macro_defs.h | 234 +- frame/include/level0/1e/bli_copy1es.h | 85 - frame/include/level0/1e/bli_copyj1es.h | 85 - frame/include/level0/1e/bli_invert1es.h | 53 - frame/include/level0/1e/bli_scal1es.h | 53 - frame/include/level0/1e/bli_scal21es.h | 235 -- frame/include/level0/1e/bli_scal2j1es.h | 235 -- .../level0/1m/bli_invert1ms_mxn_diag.h | 126 - frame/include/level0/1m/bli_scal1ms_mxn.h | 124 - frame/include/level0/1m/bli_scal21ms_mxn.h | 202 -- .../include/level0/1m/bli_scal21ms_mxn_diag.h | 126 - .../include/level0/1m/bli_scal21ms_mxn_uplo.h | 296 -- frame/include/level0/1m/bli_set1ms_mxn.h | 194 - frame/include/level0/1m/bli_set1ms_mxn_diag.h | 130 - frame/include/level0/1m/bli_set1ms_mxn_uplo.h | 198 -- .../include/level0/1m/bli_seti01ms_mxn_diag.h | 114 - frame/include/level0/1r/bli_copy1rs.h | 51 - frame/include/level0/1r/bli_copyj1rs.h | 51 - frame/include/level0/1r/bli_invert1rs.h | 43 - frame/include/level0/1r/bli_scal1rs.h | 61 - frame/include/level0/bb/bli_bcastbbs_mxn.h | 74 - frame/include/level0/bb/bli_scal2bbs_mxn.h | 204 -- frame/include/level0/bb/bli_set0bbs_mxn.h | 74 - frame/include/level0/bli_absq2s.h | 90 - frame/include/level0/bli_abval2s.h | 97 - frame/include/level0/bli_add3s.h | 192 - frame/include/level0/bli_addjs.h | 88 - frame/include/level0/bli_adds.h | 88 - frame/include/level0/bli_adds_mxn.h | 513 --- frame/include/level0/bli_adds_mxn_uplo.h | 212 -- .../{old/ri3/bli_copyri3s.h => bli_assigns.h} | 35 +- frame/include/level0/bli_axmys.h | 192 - frame/include/level0/bli_axpbyjs.h | 480 --- frame/include/level0/bli_axpbys.h | 480 --- frame/include/level0/bli_axpbys_mxn.h | 129 - frame/include/level0/bli_axpyjs.h | 192 - frame/include/level0/bli_axpys.h | 192 - .../level0/{bli_lt.h => bli_complex_terms.h} | 67 +- frame/include/level0/bli_conjs.h | 57 - frame/include/level0/bli_copycjs.h | 92 - frame/include/level0/bli_copyjnzs.h | 80 - frame/include/level0/bli_copyjs.h | 92 - frame/include/level0/bli_copynzs.h | 78 - frame/include/level0/bli_copys.h | 78 - frame/include/level0/bli_copys_mxn.h | 676 ---- .../level0/{bli_inverts.h => bli_declinits.h} | 36 +- frame/include/level0/bli_dotjs.h | 141 - frame/include/level0/bli_dots.h | 141 - frame/include/level0/bli_eq.h | 119 - frame/include/level0/bli_gets.h | 83 - frame/include/level0/bli_invscaljs.h | 88 - frame/include/level0/bli_invscals.h | 88 - frame/include/level0/bli_lte.h | 71 - frame/include/level0/bli_neg2s.h | 88 - frame/include/level0/bli_randnp2s.h | 175 - frame/include/level0/bli_rands.h | 74 - frame/include/level0/bli_scal2js.h | 192 - frame/include/level0/bli_scal2s.h | 191 - frame/include/level0/bli_scal2s_mxn.h | 122 - frame/include/level0/bli_scalcjs.h | 88 - frame/include/level0/bli_scaljs.h | 88 - frame/include/level0/bli_scals.h | 88 - frame/include/level0/bli_set0s.h | 44 - frame/include/level0/bli_set0s_edge.h | 79 - frame/include/level0/bli_set0s_mxn.h | 76 - frame/include/level0/bli_set1s.h | 44 - frame/include/level0/bli_seti0s.h | 44 - frame/include/level0/bli_setis.h | 76 - frame/include/level0/bli_sets.h | 99 - frame/include/level0/bli_sqrt2s.h | 97 - frame/include/level0/bli_subjs.h | 88 - frame/include/level0/bli_subs.h | 88 - frame/include/level0/bli_swaps.h | 171 - frame/include/level0/bli_tabsq2s.h | 135 + frame/include/level0/bli_tabval2s.h | 206 ++ frame/include/level0/bli_tadd3s.h | 192 + frame/include/level0/bli_tadds.h | 176 + frame/include/level0/bli_taxpbys.h | 264 ++ frame/include/level0/bli_taxpys.h | 224 ++ .../{1r/bli_scal21rs.h => bli_tconjs.h} | 61 +- frame/include/level0/bli_tcopycjs.h | 122 + frame/include/level0/bli_tcopynzs.h | 191 + frame/include/level0/bli_tcopys.h | 246 ++ frame/include/level0/bli_tdots.h | 109 + frame/include/level0/bli_teqs.h | 154 + .../level0/{bli_fprints.h => bli_tfprints.h} | 89 +- .../level0/{bli_setrs.h => bli_tgets.h} | 103 +- frame/include/level0/bli_tinverts.h | 219 ++ frame/include/level0/bli_tinvscals.h | 264 ++ .../{old/ri3/bli_scal2ri3s.h => bli_tneg2s.h} | 88 +- .../{1r/bli_scal2j1rs.h => bli_trandnp2s.h} | 61 +- .../ri3/bli_scal2jri3s.h => bli_trands.h} | 75 +- frame/include/level0/bli_tscal2s.h | 565 +++ frame/include/level0/bli_tscalcjs.h | 129 + frame/include/level0/bli_tscals.h | 272 ++ frame/include/level0/bli_tsets.h | 294 ++ frame/include/level0/bli_tsqrt2s.h | 195 + frame/include/level0/bli_tsubs.h | 161 + frame/include/level0/bli_tswaps.h | 146 + frame/include/level0/bli_txpbys.h | 325 ++ frame/include/level0/bli_xpbyjs.h | 191 - frame/include/level0/bli_xpbys.h | 191 - frame/include/level0/bli_xpbys_mxn.h | 830 ----- frame/include/level0/bli_xpbys_mxn_uplo.h | 300 -- frame/include/level0/old/bli_cast.h | 150 - frame/include/level0/old/bli_castfrom.h | 33 - frame/include/level0/old/bli_castto.h | 33 - frame/include/level0/old/bli_copynzjs.h | 140 - frame/include/level0/old/bli_copynzs.h | 139 - frame/include/level0/old/bli_invscalcjs.h | 159 - frame/include/level0/old/bli_scalcjs.h | 156 - frame/include/level0/old/bli_set0ris_mxn.h | 81 - frame/include/level0/old/io/bli_scal2ios.h | 61 - frame/include/level0/old/io/bli_scal2jios.h | 52 - frame/include/level0/old/ri3/bli_copyjri3s.h | 46 - .../level0/old/ri3/bli_scal2ri3s_mxn.h | 183 - .../level0/old/rih/bli_scal2rihs_mxn.h | 283 -- .../level0/old/rih/bli_scal2rihs_mxn_diag.h | 110 - .../level0/old/rih/bli_scal2rihs_mxn_uplo.h | 348 -- .../level0/old/rih/bli_setrihs_mxn_diag.h | 110 - frame/include/level0/old/ro/bli_scal2jros.h | 51 - frame/include/level0/old/ro/bli_scal2ros.h | 62 - frame/include/level0/old/rpi/bli_scal2jrpis.h | 53 - frame/include/level0/old/rpi/bli_scal2rpis.h | 66 - frame/include/level0/ri/bli_absq2ris.h | 63 - frame/include/level0/ri/bli_abval2ris.h | 80 - frame/include/level0/ri/bli_add3ris.h | 63 - frame/include/level0/ri/bli_addjris.h | 46 - frame/include/level0/ri/bli_addris.h | 63 - frame/include/level0/ri/bli_axmyris.h | 75 - frame/include/level0/ri/bli_axpbyjris.h | 91 - frame/include/level0/ri/bli_axpbyris.h | 91 - frame/include/level0/ri/bli_axpyjris.h | 169 - frame/include/level0/ri/bli_axpyris.h | 169 - frame/include/level0/ri/bli_conjris.h | 61 - frame/include/level0/ri/bli_copycjris.h | 69 - frame/include/level0/ri/bli_copyjris.h | 66 - frame/include/level0/ri/bli_copyris.h | 84 - frame/include/level0/ri/bli_eqris.h | 76 - frame/include/level0/ri/bli_invertris.h | 70 - frame/include/level0/ri/bli_invscaljris.h | 49 - frame/include/level0/ri/bli_invscalris.h | 84 - frame/include/level0/ri/bli_neg2ris.h | 63 - frame/include/level0/ri/bli_scal2jris.h | 173 - frame/include/level0/ri/bli_scal2ris.h | 173 - frame/include/level0/ri/bli_scal2ris_mxn.h | 173 - frame/include/level0/ri/bli_scalcjris.h | 77 - frame/include/level0/ri/bli_scaljris.h | 49 - frame/include/level0/ri/bli_scalris.h | 79 - .../include/level0/ri/bli_scalris_mxn_uplo.h | 110 - frame/include/level0/ri/bli_set0ris.h | 46 - frame/include/level0/ri/bli_sqrt2ris.h | 95 - frame/include/level0/ri/bli_subjris.h | 46 - frame/include/level0/ri/bli_subris.h | 63 - frame/include/level0/ri/bli_swapris.h | 77 - frame/include/level0/ri/bli_xpbyjris.h | 162 - frame/include/level0/ri/bli_xpbyris.h | 162 - frame/util/bli_util_check.c | 27 +- frame/util/bli_util_check.h | 4 +- frame/util/bli_util_fpa.c | 25 +- frame/util/bli_util_fpa.h | 4 +- frame/util/bli_util_ft.h | 6 +- frame/util/bli_util_oapi.c | 6 +- frame/util/bli_util_oapi.h | 4 +- frame/util/bli_util_tapi.c | 30 +- frame/util/bli_util_tapi.h | 12 +- frame/util/bli_util_unb_var1.c | 174 +- .../armsve/1m/bli_dpackm_armsve256_int_8x10.c | 10 +- .../1m/bli_dpackm_armsve512_asm_16x10.c | 10 +- kernels/armv8a/1m/bli_packm_armv8a_int_d6x8.c | 10 +- .../armv8a/1m/bli_packm_armv8a_int_s8x12.c | 10 +- kernels/bgq/1/bli_dotv_bgq_int.c | 2 +- kernels/bgq/1f/bli_axpyf_bgq_int.c | 16 +- .../haswell/1m/bli_packm_haswell_asm_c3x8.c | 10 +- .../haswell/1m/bli_packm_haswell_asm_d6x8.c | 10 +- .../haswell/1m/bli_packm_haswell_asm_s6x16.c | 10 +- .../haswell/1m/bli_packm_haswell_asm_z3x4.c | 10 +- .../sup/d6x8/bli_gemmsup_r_haswell_ref_dMx1.c | 14 +- .../s6x16/bli_gemmsup_r_haswell_ref_sMx1.c | 14 +- kernels/knl/1m/bli_dpackm_knl_asm_24x8.c | 8 +- kernels/knl/1m/bli_spackm_knl_asm_24x16.c | 8 +- kernels/penryn/1/bli_dotv_penryn_int.c | 6 +- kernels/penryn/1f/bli_axpyf_penryn_int.c | 8 +- kernels/penryn/1f/bli_dotaxpyv_penryn_int.c | 4 +- kernels/penryn/1f/bli_dotxaxpyf_penryn_int.c | 18 +- kernels/penryn/1f/bli_dotxf_penryn_int.c | 26 +- kernels/zen/1/bli_amaxv_zen_int.c | 44 +- kernels/zen/1/bli_axpyv_zen_int.c | 4 +- kernels/zen/1/bli_axpyv_zen_int10.c | 4 +- kernels/zen/1/bli_dotv_zen_int.c | 12 +- kernels/zen/1/bli_dotv_zen_int10.c | 12 +- kernels/zen/1/bli_dotxv_zen_int.c | 20 +- kernels/zen/1/bli_scalv_zen_int.c | 8 +- kernels/zen/1/bli_scalv_zen_int10.c | 8 +- kernels/zen/1/bli_swapv_zen_int8.c | 8 +- kernels/zen/1f/bli_axpyf_zen_int_4.c | 2 +- kernels/zen/1f/bli_axpyf_zen_int_5.c | 10 +- kernels/zen/1f/bli_axpyf_zen_int_8.c | 44 +- kernels/zen/1f/bli_dotxf_zen_int_8.c | 8 +- kernels/zen/3/bli_gemmt_small.c | 8 +- ref_kernels/1/bli_addv_ref.c | 8 +- ref_kernels/1/bli_amaxv_ref.c | 36 +- ref_kernels/1/bli_axpbyv_ref.c | 24 +- ref_kernels/1/bli_axpyv_ref.c | 12 +- ref_kernels/1/bli_copyv_ref.c | 8 +- ref_kernels/1/bli_dotv_ref.c | 16 +- ref_kernels/1/bli_dotxv_ref.c | 22 +- ref_kernels/1/bli_invertv_ref.c | 4 +- ref_kernels/1/bli_invscalv_ref.c | 10 +- ref_kernels/1/bli_scal2v_ref.c | 12 +- ref_kernels/1/bli_scalv_ref.c | 10 +- ref_kernels/1/bli_setv_ref.c | 12 +- ref_kernels/1/bli_subv_ref.c | 8 +- ref_kernels/1/bli_swapv_ref.c | 4 +- ref_kernels/1/bli_xpbyv_ref.c | 12 +- ref_kernels/1f/bli_axpy2v_ref.c | 24 +- ref_kernels/1f/bli_axpyf_ref.c | 12 +- ref_kernels/1f/bli_dotaxpyv_ref.c | 28 +- ref_kernels/1f/bli_dotxaxpyf_ref.c | 34 +- ref_kernels/1f/bli_dotxf_ref.c | 18 +- ref_kernels/1m/bli_packm_cxc_diag_1er_ref.c | 74 +- ref_kernels/1m/bli_packm_cxc_diag_ref.c | 40 +- ref_kernels/1m/bli_packm_cxc_diag_ro_ref.c | 27 +- ref_kernels/1m/bli_packm_cxk_1er_ref.c | 28 +- ref_kernels/1m/bli_packm_cxk_ref.c | 50 +- ref_kernels/1m/bli_packm_cxk_ro_ref.c | 13 +- ref_kernels/1m/bli_unpackm_cxk_ref.c | 6 +- ref_kernels/3/bli_gemm_ref.c | 72 +- ref_kernels/3/bli_gemmsup_ref.c | 116 +- ref_kernels/3/bli_gemmtrsm_ref.c | 10 +- ref_kernels/3/bli_trsm_ref.c | 24 +- ref_kernels/ind/bli_gemm1m_ref.c | 7 +- ref_kernels/ind/bli_gemm_ccr_ref.c | 7 +- ref_kernels/ind/bli_gemm_crr_ref.c | 39 +- ref_kernels/ind/bli_gemm_rcc_ref.c | 3 +- ref_kernels/ind/bli_gemmtrsm1m_ref.c | 168 +- sandbox/gemmlike/bls_l3_packm_var1.c | 4 +- test/level0/Makefile | 160 + test/level0/bli_unit_testing.h | 876 +++++ test/level0/test_l0.cxx | 3167 +++++++++++++++++ testsuite/src/test_amaxv.c | 22 +- testsuite/src/test_libblis.c | 4 +- testsuite/src/test_randm.c | 8 +- testsuite/src/test_setm.c | 6 +- testsuite/src/test_setv.c | 14 +- 335 files changed, 12249 insertions(+), 19265 deletions(-) create mode 100644 frame/include/bli_cast_macro_defs.h delete mode 100644 frame/include/level0/1e/bli_copy1es.h delete mode 100644 frame/include/level0/1e/bli_copyj1es.h delete mode 100644 frame/include/level0/1e/bli_invert1es.h delete mode 100644 frame/include/level0/1e/bli_scal1es.h delete mode 100644 frame/include/level0/1e/bli_scal21es.h delete mode 100644 frame/include/level0/1e/bli_scal2j1es.h delete mode 100644 frame/include/level0/1m/bli_invert1ms_mxn_diag.h delete mode 100644 frame/include/level0/1m/bli_scal1ms_mxn.h delete mode 100644 frame/include/level0/1m/bli_scal21ms_mxn.h delete mode 100644 frame/include/level0/1m/bli_scal21ms_mxn_diag.h delete mode 100644 frame/include/level0/1m/bli_scal21ms_mxn_uplo.h delete mode 100644 frame/include/level0/1m/bli_set1ms_mxn.h delete mode 100644 frame/include/level0/1m/bli_set1ms_mxn_diag.h delete mode 100644 frame/include/level0/1m/bli_set1ms_mxn_uplo.h delete mode 100644 frame/include/level0/1m/bli_seti01ms_mxn_diag.h delete mode 100644 frame/include/level0/1r/bli_copy1rs.h delete mode 100644 frame/include/level0/1r/bli_copyj1rs.h delete mode 100644 frame/include/level0/1r/bli_invert1rs.h delete mode 100644 frame/include/level0/1r/bli_scal1rs.h delete mode 100644 frame/include/level0/bb/bli_bcastbbs_mxn.h delete mode 100644 frame/include/level0/bb/bli_scal2bbs_mxn.h delete mode 100644 frame/include/level0/bb/bli_set0bbs_mxn.h delete mode 100644 frame/include/level0/bli_absq2s.h delete mode 100644 frame/include/level0/bli_abval2s.h delete mode 100644 frame/include/level0/bli_add3s.h delete mode 100644 frame/include/level0/bli_addjs.h delete mode 100644 frame/include/level0/bli_adds.h delete mode 100644 frame/include/level0/bli_adds_mxn.h delete mode 100644 frame/include/level0/bli_adds_mxn_uplo.h rename frame/include/level0/{old/ri3/bli_copyri3s.h => bli_assigns.h} (77%) delete mode 100644 frame/include/level0/bli_axmys.h delete mode 100644 frame/include/level0/bli_axpbyjs.h delete mode 100644 frame/include/level0/bli_axpbys.h delete mode 100644 frame/include/level0/bli_axpbys_mxn.h delete mode 100644 frame/include/level0/bli_axpyjs.h delete mode 100644 frame/include/level0/bli_axpys.h rename frame/include/level0/{bli_lt.h => bli_complex_terms.h} (60%) delete mode 100644 frame/include/level0/bli_conjs.h delete mode 100644 frame/include/level0/bli_copycjs.h delete mode 100644 frame/include/level0/bli_copyjnzs.h delete mode 100644 frame/include/level0/bli_copyjs.h delete mode 100644 frame/include/level0/bli_copynzs.h delete mode 100644 frame/include/level0/bli_copys.h delete mode 100644 frame/include/level0/bli_copys_mxn.h rename frame/include/level0/{bli_inverts.h => bli_declinits.h} (64%) delete mode 100644 frame/include/level0/bli_dotjs.h delete mode 100644 frame/include/level0/bli_dots.h delete mode 100644 frame/include/level0/bli_eq.h delete mode 100644 frame/include/level0/bli_gets.h delete mode 100644 frame/include/level0/bli_invscaljs.h delete mode 100644 frame/include/level0/bli_invscals.h delete mode 100644 frame/include/level0/bli_lte.h delete mode 100644 frame/include/level0/bli_neg2s.h delete mode 100644 frame/include/level0/bli_randnp2s.h delete mode 100644 frame/include/level0/bli_rands.h delete mode 100644 frame/include/level0/bli_scal2js.h delete mode 100644 frame/include/level0/bli_scal2s.h delete mode 100644 frame/include/level0/bli_scal2s_mxn.h delete mode 100644 frame/include/level0/bli_scalcjs.h delete mode 100644 frame/include/level0/bli_scaljs.h delete mode 100644 frame/include/level0/bli_scals.h delete mode 100644 frame/include/level0/bli_set0s.h delete mode 100644 frame/include/level0/bli_set0s_edge.h delete mode 100644 frame/include/level0/bli_set0s_mxn.h delete mode 100644 frame/include/level0/bli_set1s.h delete mode 100644 frame/include/level0/bli_seti0s.h delete mode 100644 frame/include/level0/bli_setis.h delete mode 100644 frame/include/level0/bli_sets.h delete mode 100644 frame/include/level0/bli_sqrt2s.h delete mode 100644 frame/include/level0/bli_subjs.h delete mode 100644 frame/include/level0/bli_subs.h delete mode 100644 frame/include/level0/bli_swaps.h create mode 100644 frame/include/level0/bli_tabsq2s.h create mode 100644 frame/include/level0/bli_tabval2s.h create mode 100644 frame/include/level0/bli_tadd3s.h create mode 100644 frame/include/level0/bli_tadds.h create mode 100644 frame/include/level0/bli_taxpbys.h create mode 100644 frame/include/level0/bli_taxpys.h rename frame/include/level0/{1r/bli_scal21rs.h => bli_tconjs.h} (60%) create mode 100644 frame/include/level0/bli_tcopycjs.h create mode 100644 frame/include/level0/bli_tcopynzs.h create mode 100644 frame/include/level0/bli_tcopys.h create mode 100644 frame/include/level0/bli_tdots.h create mode 100644 frame/include/level0/bli_teqs.h rename frame/include/level0/{bli_fprints.h => bli_tfprints.h} (52%) rename frame/include/level0/{bli_setrs.h => bli_tgets.h} (57%) create mode 100644 frame/include/level0/bli_tinverts.h create mode 100644 frame/include/level0/bli_tinvscals.h rename frame/include/level0/{old/ri3/bli_scal2ri3s.h => bli_tneg2s.h} (53%) rename frame/include/level0/{1r/bli_scal2j1rs.h => bli_trandnp2s.h} (60%) rename frame/include/level0/{old/ri3/bli_scal2jri3s.h => bli_trands.h} (60%) create mode 100644 frame/include/level0/bli_tscal2s.h create mode 100644 frame/include/level0/bli_tscalcjs.h create mode 100644 frame/include/level0/bli_tscals.h create mode 100644 frame/include/level0/bli_tsets.h create mode 100644 frame/include/level0/bli_tsqrt2s.h create mode 100644 frame/include/level0/bli_tsubs.h create mode 100644 frame/include/level0/bli_tswaps.h create mode 100644 frame/include/level0/bli_txpbys.h delete mode 100644 frame/include/level0/bli_xpbyjs.h delete mode 100644 frame/include/level0/bli_xpbys.h delete mode 100644 frame/include/level0/bli_xpbys_mxn.h delete mode 100644 frame/include/level0/bli_xpbys_mxn_uplo.h delete mode 100644 frame/include/level0/old/bli_cast.h delete mode 100644 frame/include/level0/old/bli_castfrom.h delete mode 100644 frame/include/level0/old/bli_castto.h delete mode 100644 frame/include/level0/old/bli_copynzjs.h delete mode 100644 frame/include/level0/old/bli_copynzs.h delete mode 100644 frame/include/level0/old/bli_invscalcjs.h delete mode 100644 frame/include/level0/old/bli_scalcjs.h delete mode 100644 frame/include/level0/old/bli_set0ris_mxn.h delete mode 100644 frame/include/level0/old/io/bli_scal2ios.h delete mode 100644 frame/include/level0/old/io/bli_scal2jios.h delete mode 100644 frame/include/level0/old/ri3/bli_copyjri3s.h delete mode 100644 frame/include/level0/old/ri3/bli_scal2ri3s_mxn.h delete mode 100644 frame/include/level0/old/rih/bli_scal2rihs_mxn.h delete mode 100644 frame/include/level0/old/rih/bli_scal2rihs_mxn_diag.h delete mode 100644 frame/include/level0/old/rih/bli_scal2rihs_mxn_uplo.h delete mode 100644 frame/include/level0/old/rih/bli_setrihs_mxn_diag.h delete mode 100644 frame/include/level0/old/ro/bli_scal2jros.h delete mode 100644 frame/include/level0/old/ro/bli_scal2ros.h delete mode 100644 frame/include/level0/old/rpi/bli_scal2jrpis.h delete mode 100644 frame/include/level0/old/rpi/bli_scal2rpis.h delete mode 100644 frame/include/level0/ri/bli_absq2ris.h delete mode 100644 frame/include/level0/ri/bli_abval2ris.h delete mode 100644 frame/include/level0/ri/bli_add3ris.h delete mode 100644 frame/include/level0/ri/bli_addjris.h delete mode 100644 frame/include/level0/ri/bli_addris.h delete mode 100644 frame/include/level0/ri/bli_axmyris.h delete mode 100644 frame/include/level0/ri/bli_axpbyjris.h delete mode 100644 frame/include/level0/ri/bli_axpbyris.h delete mode 100644 frame/include/level0/ri/bli_axpyjris.h delete mode 100644 frame/include/level0/ri/bli_axpyris.h delete mode 100644 frame/include/level0/ri/bli_conjris.h delete mode 100644 frame/include/level0/ri/bli_copycjris.h delete mode 100644 frame/include/level0/ri/bli_copyjris.h delete mode 100644 frame/include/level0/ri/bli_copyris.h delete mode 100644 frame/include/level0/ri/bli_eqris.h delete mode 100644 frame/include/level0/ri/bli_invertris.h delete mode 100644 frame/include/level0/ri/bli_invscaljris.h delete mode 100644 frame/include/level0/ri/bli_invscalris.h delete mode 100644 frame/include/level0/ri/bli_neg2ris.h delete mode 100644 frame/include/level0/ri/bli_scal2jris.h delete mode 100644 frame/include/level0/ri/bli_scal2ris.h delete mode 100644 frame/include/level0/ri/bli_scal2ris_mxn.h delete mode 100644 frame/include/level0/ri/bli_scalcjris.h delete mode 100644 frame/include/level0/ri/bli_scaljris.h delete mode 100644 frame/include/level0/ri/bli_scalris.h delete mode 100644 frame/include/level0/ri/bli_scalris_mxn_uplo.h delete mode 100644 frame/include/level0/ri/bli_set0ris.h delete mode 100644 frame/include/level0/ri/bli_sqrt2ris.h delete mode 100644 frame/include/level0/ri/bli_subjris.h delete mode 100644 frame/include/level0/ri/bli_subris.h delete mode 100644 frame/include/level0/ri/bli_swapris.h delete mode 100644 frame/include/level0/ri/bli_xpbyjris.h delete mode 100644 frame/include/level0/ri/bli_xpbyris.h create mode 100644 test/level0/Makefile create mode 100644 test/level0/bli_unit_testing.h create mode 100644 test/level0/test_l0.cxx diff --git a/blastest/src/dblat1.c b/blastest/src/dblat1.c index e848671787..ccac12c88a 100644 --- a/blastest/src/dblat1.c +++ b/blastest/src/dblat1.c @@ -1044,7 +1044,7 @@ static real c_b81 = 0.f; /* Local variables */ real sd; - extern real s_epsilon_(); + extern real s_epsilon_(real *); /* Fortran I/O blocks */ static cilist io___125 = { 0, 6, 0, fmt_99999, 0 }; diff --git a/build/libblis-symbols.def b/build/libblis-symbols.def index 190dea77c2..7262888789 100644 --- a/build/libblis-symbols.def +++ b/build/libblis-symbols.def @@ -121,7 +121,7 @@ bli_cger_ex bli_cgetijm bli_cgetijv bli_cgetsc -bli_cgtesc +bli_cgesc bli_cgtsc bli_check_error_code_helper bli_chemm @@ -149,7 +149,7 @@ bli_cinvscalv bli_cinvscalv_ex bli_clock bli_clock_min_diff -bli_cltesc +bli_clesc bli_cltsc bli_cmachval bli_cmkherm @@ -351,7 +351,7 @@ bli_dger_ex bli_dgetijm bli_dgetijv bli_dgetsc -bli_dgtesc +bli_dgesc bli_dgtsc bli_dhemm bli_dhemm_ex @@ -377,7 +377,7 @@ bli_dinvscalm_ex bli_dinvscalv bli_dinvscalv_ex bli_divsc -bli_dltesc +bli_dlesc bli_dltsc bli_dmachval bli_dmkherm @@ -532,7 +532,7 @@ bli_gks_l3_ukr_impl_type bli_gks_query_cntx bli_gks_query_ind_cntx bli_gks_query_nat_cntx -bli_gtesc +bli_gesc bli_gtsc bli_hemm bli_hemm_ex @@ -630,7 +630,7 @@ bli_iprintm bli_iprintv bli_isetsc bli_l3_thrinfo_create -bli_ltesc +bli_lesc bli_ltsc bli_machval bli_malloc_user @@ -824,7 +824,7 @@ bli_sger_ex bli_sgetijm bli_sgetijv bli_sgetsc -bli_sgtesc +bli_sgesc bli_sgtsc bli_shemm bli_shemm_ex @@ -852,7 +852,7 @@ bli_sinvscalm_ex bli_sinvscalv bli_sinvscalv_ex bli_sleep -bli_sltesc +bli_slesc bli_sltsc bli_smachval bli_smkherm @@ -1097,7 +1097,7 @@ bli_zger_ex bli_zgetijm bli_zgetijv bli_zgetsc -bli_zgtesc +bli_zgesc bli_zgtsc bli_zhemm bli_zhemm_ex @@ -1123,7 +1123,7 @@ bli_zinvscalm_ex bli_zinvscalv bli_zinvscalv_ex bli_zipsc -bli_zltesc +bli_zlesc bli_zltsc bli_zmachval bli_zmkherm diff --git a/build/plugin/my_kernel_1_ref.c b/build/plugin/my_kernel_1_ref.c index dc34338140..42fa593c10 100644 --- a/build/plugin/my_kernel_1_ref.c +++ b/build/plugin/my_kernel_1_ref.c @@ -48,7 +48,7 @@ void PASTEMAC(ch,opname,arch,suf) \ \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,copys)( *a, x[ i ] ); \ + bli_tcopys( ch,ch, *a, x[ i ] ); \ } \ } diff --git a/build/plugin/my_kernel_2_ref.c b/build/plugin/my_kernel_2_ref.c index 27aa1e96ba..0d241b5c1a 100644 --- a/build/plugin/my_kernel_2_ref.c +++ b/build/plugin/my_kernel_2_ref.c @@ -56,7 +56,7 @@ void PASTEMAC(ch,opname,arch,suf) \ { \ for ( dim_t i = 0; i < m; ++i ) \ { \ - PASTEMAC(ch,seti0s)( a[ i*n + j ] ); \ + bli_tseti0s( ch, a[ i*n + j ] ); \ } \ } \ } \ @@ -66,7 +66,7 @@ void PASTEMAC(ch,opname,arch,suf) \ { \ for ( dim_t j = 0; j < n; ++j ) \ { \ - PASTEMAC(ch,seti0s)( a[ i + j*m ] ); \ + bli_tseti0s( ch, a[ i + j*m ] ); \ } \ } \ } \ diff --git a/config/template/kernels/1/bli_axpyv_template_noopt_var1.c b/config/template/kernels/1/bli_axpyv_template_noopt_var1.c index 8796bab267..511514aeab 100644 --- a/config/template/kernels/1/bli_axpyv_template_noopt_var1.c +++ b/config/template/kernels/1/bli_axpyv_template_noopt_var1.c @@ -117,7 +117,7 @@ void bli_zaxpyv_template_noopt if ( bli_zero_dim1( n ) ) return; - if ( bli_zeq0( *alpha ) ) return; + if ( bli_teq0s( z, *alpha ) ) return; // If there is anything that would interfere with our use of aligned diff --git a/config/template/kernels/1/bli_dotv_template_noopt_var1.c b/config/template/kernels/1/bli_dotv_template_noopt_var1.c index 90f93b8177..60ebf3e146 100644 --- a/config/template/kernels/1/bli_dotv_template_noopt_var1.c +++ b/config/template/kernels/1/bli_dotv_template_noopt_var1.c @@ -261,6 +261,6 @@ void bli_zdotv_template_noopt if ( bli_is_conj( conjy ) ) bli_zconjs( dotxy ); - bli_zcopys( dotxy, *rho ); + bli_tcopys( z,z, dotxy, *rho ); } diff --git a/config/template/kernels/1f/bli_axpyf_template_noopt_var1.c b/config/template/kernels/1f/bli_axpyf_template_noopt_var1.c index f7b4922864..6a40ed3554 100644 --- a/config/template/kernels/1f/bli_axpyf_template_noopt_var1.c +++ b/config/template/kernels/1f/bli_axpyf_template_noopt_var1.c @@ -209,7 +209,7 @@ void bli_zaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zcopys( *xp[ j ], alpha_x[ j ] ); + bli_tcopys( z,z, *xp[ j ], alpha_x[ j ] ); bli_zscals( *alpha, alpha_x[ j ] ); } } @@ -217,7 +217,7 @@ void bli_zaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zcopyjs( *xp[ j ], alpha_x[ j ] ); + bli_tcopyjs( z,z, *xp[ j ], alpha_x[ j ] ); bli_zscals( *alpha, alpha_x[ j ] ); } } diff --git a/config/template/kernels/1f/bli_dotaxpyv_template_noopt_var1.c b/config/template/kernels/1f/bli_dotaxpyv_template_noopt_var1.c index 31a3097c0e..ca1076e3ec 100644 --- a/config/template/kernels/1f/bli_dotaxpyv_template_noopt_var1.c +++ b/config/template/kernels/1f/bli_dotaxpyv_template_noopt_var1.c @@ -363,6 +363,6 @@ void bli_zdotaxpyv_template_noopt if ( bli_is_conj( conjy ) ) bli_zconjs( dotxy ); - bli_zcopys( dotxy, *rho ); + bli_tcopys( z,z, dotxy, *rho ); } diff --git a/config/template/kernels/1f/bli_dotxaxpyf_template_noopt_var1.c b/config/template/kernels/1f/bli_dotxaxpyf_template_noopt_var1.c index aeb502f354..2667d92722 100644 --- a/config/template/kernels/1f/bli_dotxaxpyf_template_noopt_var1.c +++ b/config/template/kernels/1f/bli_dotxaxpyf_template_noopt_var1.c @@ -238,7 +238,7 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zcopys( *xp[ j ], alpha_x[ j ] ); + bli_tcopys( z,z, *xp[ j ], alpha_x[ j ] ); bli_zscals( *alpha, alpha_x[ j ] ); } } @@ -246,7 +246,7 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zcopyjs( *xp[ j ], alpha_x[ j ] ); + bli_tcopyjs( z,z, *xp[ j ], alpha_x[ j ] ); bli_zscals( *alpha, alpha_x[ j ] ); } } diff --git a/config/template/kernels/3/bli_gemm_template_noopt_mxn.c b/config/template/kernels/3/bli_gemm_template_noopt_mxn.c index 190519fa0a..97a924b0a5 100644 --- a/config/template/kernels/3/bli_gemm_template_noopt_mxn.c +++ b/config/template/kernels/3/bli_gemm_template_noopt_mxn.c @@ -98,7 +98,7 @@ void bli_zgemm_template_noopt /* Initialize the accumulator elements in ab to zero. */ for ( i = 0; i < mr * nr; ++i ) { - bli_zset0s( *(ab + i) ); + bli_tset0s( z, *(ab + i) ); } /* Perform a series of k rank-1 updates into ab. */ @@ -116,7 +116,7 @@ void bli_zgemm_template_noopt { ai = *(a1 + i); - bli_zdots( ai, bj, *abij ); + bli_tdots( z,z,z,z, ai, bj, *abij ); abij += rs_ab; } @@ -129,16 +129,17 @@ void bli_zgemm_template_noopt /* Scale each element of ab by alpha. */ for ( i = 0; i < mr * nr; ++i ) { - bli_zscals( *alpha, *(ab + i) ); + bli_tscals( z,z,z, *alpha, *(ab + i) ); } /* If beta is zero, overwrite c11 with the scaled result in ab. Otherwise, scale c11 by beta and then add the scaled result in ab. */ - if ( bli_zeq0( *beta ) ) + if ( bli_teq0s( z, *beta ) ) { /* c11 := ab */ - bli_zcopys_mxn( m, + bli_tcopys_mxn( z,z, + m, n, ab, rs_ab, cs_ab, c11, rs_c, cs_c ); @@ -146,7 +147,8 @@ void bli_zgemm_template_noopt else { /* c11 := beta * c11 + ab */ - bli_zxpbys_mxn( m, + bli_txpbys_mxn( z,z,z,z, + m, n, ab, rs_ab, cs_ab, beta, diff --git a/config/template/kernels/3/bli_trsm_l_template_noopt_mxn.c b/config/template/kernels/3/bli_trsm_l_template_noopt_mxn.c index 4e6634dea3..0a963a2d8e 100644 --- a/config/template/kernels/3/bli_trsm_l_template_noopt_mxn.c +++ b/config/template/kernels/3/bli_trsm_l_template_noopt_mxn.c @@ -134,7 +134,7 @@ void bli_ztrsm_l_template_noopt bli_zscals( *alpha11, *chi11 ); /* Output final result to matrix C. */ - bli_zcopys( *chi11, *gamma11 ); + bli_tcopys( z,z, *chi11, *gamma11 ); } } } diff --git a/config/template/kernels/3/bli_trsm_u_template_noopt_mxn.c b/config/template/kernels/3/bli_trsm_u_template_noopt_mxn.c index 42982459ad..c65c5e3523 100644 --- a/config/template/kernels/3/bli_trsm_u_template_noopt_mxn.c +++ b/config/template/kernels/3/bli_trsm_u_template_noopt_mxn.c @@ -134,7 +134,7 @@ void bli_ztrsm_u_template_noopt bli_zscals( *alpha11, *chi11 ); /* Output final result to matrix C. */ - bli_zcopys( *chi11, *gamma11 ); + bli_tcopys( z,z, *chi11, *gamma11 ); } } } diff --git a/docs/Multithreading.md b/docs/Multithreading.md index d8f8b13f40..f9ba29dad3 100644 --- a/docs/Multithreading.md +++ b/docs/Multithreading.md @@ -38,7 +38,7 @@ To summarize: In order to observe multithreaded parallelism within a BLIS operat BLIS disables multithreading by default. In order to allow multithreaded parallelism from BLIS, you must first enable multithreading explicitly at configure-time. -As of this writing, BLIS optionally supports multithreading via OpenMP or POSIX threads(or both). +As of this writing, BLIS optionally supports multithreading via OpenMP or POSIX bli_threads(or both). To enable multithreading via OpenMP, you must provide the `--enable-threading` option to the `configure` script: ``` diff --git a/frame/0/bli_l0_tapi.c b/frame/0/bli_l0_tapi.c index ef2a942f3d..d972eacded 100644 --- a/frame/0/bli_l0_tapi.c +++ b/frame/0/bli_l0_tapi.c @@ -52,8 +52,8 @@ void PASTEMAC(ch,opname) \ \ ctype chi_conj; \ \ - PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \ - PASTEMAC(ch,kername)( chi_conj, *psi ); \ + bli_tcopycjs( ch,ch, conjchi, *chi, chi_conj ); \ + PASTEMAC(t,kername)( ch,ch,ch, chi_conj, *psi ); \ } INSERT_GENTFUNC_BASIC( addsc, adds ) @@ -75,9 +75,9 @@ void PASTEMAC(ch,opname) \ \ ctype chi_conj; \ \ - PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \ - PASTEMAC(ch,kername)( chi_conj ); \ - PASTEMAC(ch,copys)( chi_conj, *psi ); \ + bli_tcopycjs( ch,ch, conjchi, *chi, chi_conj ); \ + PASTEMAC(t,kername)( ch,ch, chi_conj ); \ + bli_tcopys( ch,ch, chi_conj, *psi ); \ } INSERT_GENTFUNC_BASIC( invertsc, inverts ) @@ -95,17 +95,17 @@ void PASTEMAC(ch,opname) \ { \ bli_init_once(); \ \ - if ( PASTEMAC(ch,eq0)( *chi ) ) \ + if ( bli_teq0s( ch, *chi ) ) \ { \ /* Overwrite potential Infs and NaNs. */ \ - PASTEMAC(ch,set0s)( *psi ); \ + bli_tset0s( ch, *psi ); \ } \ else \ { \ ctype chi_conj; \ \ - PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \ - PASTEMAC(ch,kername)( chi_conj, *psi ); \ + bli_tcopycjs( ch,ch, conjchi, *chi, chi_conj ); \ + PASTEMAC(t,kername)( ch,ch,ch, chi_conj, *psi ); \ } \ } @@ -129,11 +129,11 @@ void PASTEMAC(ch,opname) \ \ ( void )absq_i; \ \ - PASTEMAC(ch,chr,gets)( *chi, chi_r, chi_i ); \ + bli_tgets( ch,chr, *chi, chi_r, chi_i ); \ \ /* absq = chi_r * chi_r + chi_i * chi_i; \ absq_r = 0.0; (thrown away) */ \ - PASTEMAC(ch,absq2ris)( chi_r, chi_i, *absq, absq_i ); \ + bli_tabsq2ris( ch,ch,ch, chi_r, chi_i, *absq, absq_i ); \ \ ( void )chi_i; \ } @@ -153,7 +153,7 @@ void PASTEMAC(ch,opname) \ bli_init_once(); \ \ /* norm = sqrt( chi_r * chi_r + chi_i * chi_i ); */ \ - PASTEMAC(ch,chr,abval2s)( *chi, *norm ); \ + bli_tabval2s( ch,chr,chr, *chi, *norm ); \ } INSERT_GENTFUNCR_BASIC( normfsc ) @@ -171,7 +171,7 @@ void PASTEMAC(ch,opname) \ bli_init_once(); \ \ /* NOTE: sqrtsc/sqrt2s differs from normfsc/abval2s in the complex domain. */ \ - PASTEMAC(ch,sqrt2s)( *chi, *psi ); \ + bli_tsqrt2s( ch,ch,ch, *chi, *psi ); \ } INSERT_GENTFUNC_BASIC( sqrtsc ) @@ -190,7 +190,7 @@ void PASTEMAC(ch,opname) \ \ const ctype_r chi_r = PASTEMAC(ch,real)( *chi ); \ \ - PASTEMAC(chr,ch,sqrt2s)( chi_r, *psi ); \ + bli_tsqrt2s( chr,ch,chr, chi_r, *psi ); \ } INSERT_GENTFUNCR_BASIC( sqrtrsc ) @@ -208,7 +208,7 @@ void PASTEMAC(ch,opname) \ { \ bli_init_once(); \ \ - PASTEMAC(ch,d,gets)( *chi, *zeta_r, *zeta_i ); \ + bli_tgets( ch,d, *chi, *zeta_r, *zeta_i ); \ } INSERT_GENTFUNC_BASIC( getsc ) @@ -226,7 +226,7 @@ void PASTEMAC(ch,opname) \ { \ bli_init_once(); \ \ - PASTEMAC(d,ch,sets)( zeta_r, zeta_i, *chi ); \ + bli_tsets( d,ch, zeta_r, zeta_i, *chi ); \ } INSERT_GENTFUNC_BASIC( setsc ) @@ -244,7 +244,7 @@ void PASTEMAC(ch,opname) \ { \ bli_init_once(); \ \ - PASTEMAC(ch,chr,gets)( *chi, *zeta_r, *zeta_i ); \ + bli_tgets( ch,chr, *chi, *zeta_r, *zeta_i ); \ } INSERT_GENTFUNCR_BASIC( unzipsc ) @@ -262,7 +262,7 @@ void PASTEMAC(ch,opname) \ { \ bli_init_once(); \ \ - PASTEMAC(chr,ch,sets)( *zeta_r, *zeta_i, *chi ); \ + bli_tsets( chr,ch, *zeta_r, *zeta_i, *chi ); \ } INSERT_GENTFUNCR_BASIC( zipsc ) @@ -278,7 +278,7 @@ void bli_igetsc { bli_init_once(); - PASTEMAC(i,d,gets)( *chi, *zeta_r, *zeta_i ); + bli_tgets( i,d, *chi, *zeta_r, *zeta_i ); } void bli_isetsc @@ -290,6 +290,6 @@ void bli_isetsc { bli_init_once(); - PASTEMAC(d,i,sets)( zeta_r, zeta_i, *chi ); + bli_tsets( d,i, zeta_r, zeta_i, *chi ); } diff --git a/frame/0/copysc/bli_copysc.c b/frame/0/copysc/bli_copysc.c index 11e1115441..77d06f6100 100644 --- a/frame/0/copysc/bli_copysc.c +++ b/frame/0/copysc/bli_copysc.c @@ -117,11 +117,11 @@ void PASTEMAC(chx,chy,varname) \ \ if ( bli_is_conj( conjchi ) ) \ { \ - PASTEMAC(chx,chy,copyjs)( *chi_cast, *psi_cast ); \ + bli_tcopyjs( chx,chy, *chi_cast, *psi_cast ); \ } \ else \ { \ - PASTEMAC(chx,chy,copys)( *chi_cast, *psi_cast ); \ + bli_tcopys( chx,chy, *chi_cast, *psi_cast ); \ } \ } diff --git a/frame/1d/bli_l1d_tapi.c b/frame/1d/bli_l1d_tapi.c index 17e7fcd3bb..47737872f3 100644 --- a/frame/1d/bli_l1d_tapi.c +++ b/frame/1d/bli_l1d_tapi.c @@ -363,7 +363,7 @@ void PASTEMAC(ch,opname,EX_SUF) \ { \ ctype* chi11 = x1 + (i )*incx; \ \ - PASTEMAC(ch,setis)( *alpha, *chi11 ); \ + bli_tsetis( ch,ch, *alpha, *chi11 ); \ } */ \ \ /* Acquire the address of the imaginary component of the first element, diff --git a/frame/1m/bli_l1m_tapi.c b/frame/1m/bli_l1m_tapi.c index d17df0eb7a..775f450958 100644 --- a/frame/1m/bli_l1m_tapi.c +++ b/frame/1m/bli_l1m_tapi.c @@ -208,7 +208,7 @@ void PASTEMAC(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* If alpha is zero, then the entire operation is a no-op. */ \ - if ( PASTEMAC(ch,eq0)( *alpha ) ) return; \ + if ( bli_teq0s( ch, *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ @@ -288,7 +288,7 @@ void PASTEMAC(ch,opname,EX_SUF) \ /* If alpha is zero, then we set the output matrix to zero. This seemingly minor optimization is important because it will clear any NaNs and Infs in x that would otherwise propogate. */ \ - if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + if ( bli_teq0s( ch, *alpha ) ) \ { \ \ PASTEMAC(ch,setm,BLIS_TAPI_EX_SUF) \ @@ -429,7 +429,7 @@ void PASTEMAC(ch,opname,EX_SUF) \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* If beta is zero, then the operation reduces to copym. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ PASTEMAC(ch,copym,_unb_var1) \ ( \ @@ -520,7 +520,7 @@ void PASTEMAC(chx,chy,opname,EX_SUF) \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* If beta is zero, then the operation reduces to copym. */ \ - if ( PASTEMAC(chy,eq0)( *beta ) ) \ + if ( bli_teq0s( chy, *beta ) ) \ { \ PASTEMAC(chx,chy,castm) \ ( \ diff --git a/frame/1m/bli_l1m_unb_var1.c b/frame/1m/bli_l1m_unb_var1.c index 749e372bb0..fc790ed773 100644 --- a/frame/1m/bli_l1m_unb_var1.c +++ b/frame/1m/bli_l1m_unb_var1.c @@ -532,7 +532,7 @@ void PASTEMAC(chx,chy,opname) \ /*conjx = bli_extract_conj( transx );*/ \ \ /* Handle dense and upper/lower storage cases separately. */ \ - if ( PASTEMAC(chy,eq1)( *beta ) ) \ + if ( bli_teq1s( chy, *beta ) ) \ { \ if ( incx == 1 && incy == 1 ) \ { \ @@ -545,7 +545,7 @@ void PASTEMAC(chx,chy,opname) \ \ for ( dim_t i = 0; i < n_elem; ++i ) \ { \ - PASTEMAC(chx,chy,adds)( x1[i], y1[i] ); \ + bli_tadds( chx,chy,chy, x1[i], y1[i] ); \ } \ } \ } \ @@ -563,7 +563,7 @@ void PASTEMAC(chx,chy,opname) \ \ for ( dim_t i = 0; i < n_elem; ++i ) \ { \ - PASTEMAC(chx,chy,adds)( *chi1, *psi1 ); \ + bli_tadds( chx,chy,chy, *chi1, *psi1 ); \ \ chi1 += incx; \ psi1 += incy; \ @@ -571,7 +571,7 @@ void PASTEMAC(chx,chy,opname) \ } \ } \ } \ - else /* ( !PASTEMAC(chy,eq1)( *beta ) ) */ \ + else /* ( !bli_teq1s( chy, *beta ) ) */ \ { \ if ( incx == 1 && incy == 1 ) \ { \ @@ -584,7 +584,7 @@ void PASTEMAC(chx,chy,opname) \ \ for ( dim_t i = 0; i < n_elem; ++i ) \ { \ - PASTEMAC(chx,chy,chy,xpbys)( x1[i], *beta, y1[i] ); \ + bli_txpbys( chx,chy,chy,chy, x1[i], *beta, y1[i] ); \ } \ } \ } \ @@ -602,7 +602,7 @@ void PASTEMAC(chx,chy,opname) \ \ for ( dim_t i = 0; i < n_elem; ++i ) \ { \ - PASTEMAC(chx,chy,chy,xpbys)( *chi1, *beta, *psi1 ); \ + bli_txpbys( chx,chy,chy,chy, *chi1, *beta, *psi1 ); \ \ chi1 += incx; \ psi1 += incy; \ diff --git a/frame/1m/packm/bli_packm_struc_cxk.c b/frame/1m/packm/bli_packm_struc_cxk.c index 74f9de8f85..0791de524e 100644 --- a/frame/1m/packm/bli_packm_struc_cxk.c +++ b/frame/1m/packm/bli_packm_struc_cxk.c @@ -95,8 +95,8 @@ void PASTEMAC(chc,chp,varname) \ { \ ctypep_r kappa_r, kappa_i; \ ( void )kappa_r; \ - PASTEMAC(chp,gets)( *( ctypep* )kappa, kappa_r, kappa_i ); \ - if ( PASTEMAC(chp_r,eq0)( kappa_i ) ) \ + bli_tgets( chp,chp, *( ctypep* )kappa, kappa_r, kappa_i ); \ + if ( bli_teq0s( chp_r, kappa_i ) ) \ { \ /* Treat the matrix as real with doubled strides. */ \ dt_c = bli_dt_proj_to_real( dt_c ); \ diff --git a/frame/2/bli_l2_tapi.c b/frame/2/bli_l2_tapi.c index f6f2a035d4..4b91172319 100644 --- a/frame/2/bli_l2_tapi.c +++ b/frame/2/bli_l2_tapi.c @@ -74,7 +74,7 @@ void PASTEMAC(ch,opname,EX_SUF) \ \ /* If x has zero elements, or if alpha is zero, scale y by beta and return early. */ \ - if ( bli_zero_dim1( n_x ) || PASTEMAC(ch,eq0)( *alpha ) ) \ + if ( bli_zero_dim1( n_x ) || bli_teq0s( ch, *alpha ) ) \ { \ PASTEMAC(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ @@ -144,7 +144,7 @@ void PASTEMAC(ch,opname,EX_SUF) \ BLIS_TAPI_EX_DECLS \ \ /* If x or y has zero elements, or if alpha is zero, return early. */ \ - if ( bli_zero_dim2( m, n ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ + if ( bli_zero_dim2( m, n ) || bli_teq0s( ch, *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ @@ -201,7 +201,7 @@ void PASTEMAC(ch,opname,EX_SUF) \ \ /* If x has zero elements, or if alpha is zero, scale y by beta and return early. */ \ - if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) \ + if ( bli_zero_dim1( m ) || bli_teq0s( ch, *alpha ) ) \ { \ PASTEMAC(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ @@ -273,12 +273,12 @@ void PASTEMAC(ch,opname,EX_SUF) \ ctype alpha_local; \ \ /* If x has zero elements, or if alpha is zero, return early. */ \ - if ( bli_zero_dim1( m ) || PASTEMAC(chr,eq0)( *alpha ) ) return; \ + if ( bli_zero_dim1( m ) || bli_teq0s( chr, *alpha ) ) return; \ \ /* Make a local copy of alpha, cast into the complex domain. This allows us to use the same underlying her variants to implement both her and syr operations. */ \ - PASTEMAC(chr,ch,copys)( *alpha, alpha_local ); \ + bli_tcopys( chr,ch, *alpha, alpha_local ); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ @@ -335,7 +335,7 @@ void PASTEMAC(ch,opname,EX_SUF) \ BLIS_TAPI_EX_DECLS \ \ /* If x has zero elements, or if alpha is zero, return early. */ \ - if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ + if ( bli_zero_dim1( m ) || bli_teq0s( ch, *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ @@ -394,7 +394,7 @@ void PASTEMAC(ch,opname,EX_SUF) \ BLIS_TAPI_EX_DECLS \ \ /* If x has zero elements, or if alpha is zero, return early. */ \ - if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ + if ( bli_zero_dim1( m ) || bli_teq0s( ch, *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ @@ -461,7 +461,7 @@ void PASTEMAC(ch,opname,EX_SUF) \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* If alpha is zero, set x to zero and return early. */ \ - if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + if ( bli_teq0s( ch, *alpha ) ) \ { \ PASTEMAC(ch,setv,BLIS_TAPI_EX_SUF) \ ( \ diff --git a/frame/2/gemv/amd/bli_gemv_unf_var2_amd.c b/frame/2/gemv/amd/bli_gemv_unf_var2_amd.c index b80916adc0..8a32c277e6 100644 --- a/frame/2/gemv/amd/bli_gemv_unf_var2_amd.c +++ b/frame/2/gemv/amd/bli_gemv_unf_var2_amd.c @@ -82,7 +82,7 @@ void PASTEMAC(ch,varname) \ ); \ \ /* If alpha == 0, then we are done. */ \ - if ( PASTEMAC(ch,eq0)( *alpha ) ) return; \ + if ( bli_teq0s( ch, *alpha ) ) return; \ \ /* Query the context for the kernel function pointer and fusing factor. */ \ /*axpyf_ker_ft kfp_af = bli_cntx_get_l1f_ker_dt( dt, BLIS_AXPYF_KER, cntx );*/ \ @@ -158,7 +158,7 @@ void PASTEMAC(ch,varname) \ conja = bli_extract_conj( transa ); \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC(ch,setv,BLIS_TAPI_EX_SUF) \ diff --git a/frame/2/gemv/bli_gemv_unb_var2.c b/frame/2/gemv/bli_gemv_unb_var2.c index f40bb2dab1..c512bff7f5 100644 --- a/frame/2/gemv/bli_gemv_unb_var2.c +++ b/frame/2/gemv/bli_gemv_unb_var2.c @@ -70,7 +70,7 @@ void PASTEMAC(ch,varname) \ conja = bli_extract_conj( transa ); \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC(ch,setv,BLIS_TAPI_EX_SUF) \ @@ -107,8 +107,8 @@ void PASTEMAC(ch,varname) \ y1 = y + (0 )*incy; \ \ /* y = y + alpha * chi1 * a1; */ \ - PASTEMAC(ch,copycjs)( conjx, *chi1, alpha_chi1 ); \ - PASTEMAC(ch,scals)( *alpha, alpha_chi1 ); \ + bli_tcopycjs( ch,ch, conjx, *chi1, alpha_chi1 ); \ + bli_tscals( ch,ch,ch, *alpha, alpha_chi1 ); \ \ kfp_av \ ( \ diff --git a/frame/2/gemv/bli_gemv_unf_var2.c b/frame/2/gemv/bli_gemv_unf_var2.c index a890171161..f1be39ca57 100644 --- a/frame/2/gemv/bli_gemv_unf_var2.c +++ b/frame/2/gemv/bli_gemv_unf_var2.c @@ -70,7 +70,7 @@ void PASTEMAC(ch,varname) \ conja = bli_extract_conj( transa ); \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC(ch,setv,BLIS_TAPI_EX_SUF) \ diff --git a/frame/2/ger/bli_ger_unb_var1.c b/frame/2/ger/bli_ger_unb_var1.c index 24e96822b9..0b07611302 100644 --- a/frame/2/ger/bli_ger_unb_var1.c +++ b/frame/2/ger/bli_ger_unb_var1.c @@ -68,8 +68,8 @@ void PASTEMAC(ch,varname) \ y1 = y + (0 )*incy; \ \ /* a1t = a1t + alpha * chi1 * y; */ \ - PASTEMAC(ch,copycjs)( conjx, *chi1, alpha_chi1 ); \ - PASTEMAC(ch,scals)( *alpha, alpha_chi1 ); \ + bli_tcopycjs( ch,ch, conjx, *chi1, alpha_chi1 ); \ + bli_tscals( ch,ch,ch, *alpha, alpha_chi1 ); \ \ kfp_av \ ( \ diff --git a/frame/2/ger/bli_ger_unb_var2.c b/frame/2/ger/bli_ger_unb_var2.c index fb38e683d8..ee40223ca4 100644 --- a/frame/2/ger/bli_ger_unb_var2.c +++ b/frame/2/ger/bli_ger_unb_var2.c @@ -68,8 +68,8 @@ void PASTEMAC(ch,varname) \ psi1 = y + (j )*incy; \ \ /* a1 = a1 + alpha * psi1 * x; */ \ - PASTEMAC(ch,copycjs)( conjy, *psi1, alpha_psi1 ); \ - PASTEMAC(ch,scals)( *alpha, alpha_psi1 ); \ + bli_tcopycjs( ch,ch, conjy, *psi1, alpha_psi1 ); \ + bli_tscals( ch,ch,ch, *alpha, alpha_psi1 ); \ \ kfp_av \ ( \ diff --git a/frame/2/hemv/bli_hemv_unb_var1.c b/frame/2/hemv/bli_hemv_unb_var1.c index eeffc42929..204c90f0da 100644 --- a/frame/2/hemv/bli_hemv_unb_var1.c +++ b/frame/2/hemv/bli_hemv_unb_var1.c @@ -91,7 +91,7 @@ void PASTEMAC(ch,varname) \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC(ch,setv,BLIS_TAPI_EX_SUF) \ @@ -134,8 +134,8 @@ void PASTEMAC(ch,varname) \ psi1 = y + (i )*incy; \ \ /* Apply conjx to chi1 and and scale by alpha. */ \ - PASTEMAC(ch,copycjs)( conjx, *chi1, conjx_chi1 ); \ - PASTEMAC(ch,scal2s)( *alpha, conjx_chi1, alpha_chi1 ); \ + bli_tcopycjs( ch,ch, conjx, *chi1, conjx_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, conjx_chi1, alpha_chi1 ); \ \ /* y0 = y0 + alpha * a10t' * chi1; */ \ kfp_av \ @@ -164,12 +164,12 @@ void PASTEMAC(ch,varname) \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( alpha11_temp ); \ + bli_tseti0s( ch, alpha11_temp ); \ \ /* psi1 = psi1 + alpha * alpha11 * chi1; */ \ - PASTEMAC(ch,axpys)( alpha_chi1, alpha11_temp, *psi1 ); \ + bli_taxpys( ch,ch,ch,ch, alpha_chi1, alpha11_temp, *psi1 ); \ \ } \ } diff --git a/frame/2/hemv/bli_hemv_unb_var2.c b/frame/2/hemv/bli_hemv_unb_var2.c index 07de60dcc0..c11563c4a9 100644 --- a/frame/2/hemv/bli_hemv_unb_var2.c +++ b/frame/2/hemv/bli_hemv_unb_var2.c @@ -93,7 +93,7 @@ void PASTEMAC(ch,varname) \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC(ch,setv,BLIS_TAPI_EX_SUF) \ @@ -136,8 +136,8 @@ void PASTEMAC(ch,varname) \ psi1 = y + (i )*incy; \ \ /* Apply conjx to chi1 and and scale by alpha. */ \ - PASTEMAC(ch,copycjs)( conjx, *chi1, conjx_chi1 ); \ - PASTEMAC(ch,scal2s)( *alpha, conjx_chi1, alpha_chi1 ); \ + bli_tcopycjs( ch,ch, conjx, *chi1, conjx_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, conjx_chi1, alpha_chi1 ); \ \ /* psi1 = psi1 + alpha * a10t * x0; */ \ kfp_dv \ @@ -169,12 +169,12 @@ void PASTEMAC(ch,varname) \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( alpha11_temp ); \ + bli_tseti0s( ch, alpha11_temp ); \ \ /* psi1 = psi1 + alpha * alpha11 * chi1; */ \ - PASTEMAC(ch,axpys)( alpha_chi1, alpha11_temp, *psi1 ); \ + bli_taxpys( ch,ch,ch,ch, alpha_chi1, alpha11_temp, *psi1 ); \ } \ } diff --git a/frame/2/hemv/bli_hemv_unb_var3.c b/frame/2/hemv/bli_hemv_unb_var3.c index 1edd78f824..6654acd5e8 100644 --- a/frame/2/hemv/bli_hemv_unb_var3.c +++ b/frame/2/hemv/bli_hemv_unb_var3.c @@ -91,7 +91,7 @@ void PASTEMAC(ch,varname) \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC(ch,setv,BLIS_TAPI_EX_SUF) \ @@ -134,17 +134,17 @@ void PASTEMAC(ch,varname) \ y2 = y + (i+1)*incy; \ \ /* Apply conjx to chi1 and and scale by alpha. */ \ - PASTEMAC(ch,copycjs)( conjx, *chi1, conjx_chi1 ); \ - PASTEMAC(ch,scal2s)( *alpha, conjx_chi1, alpha_chi1 ); \ + bli_tcopycjs( ch,ch, conjx, *chi1, conjx_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, conjx_chi1, alpha_chi1 ); \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( alpha11_temp ); \ + bli_tseti0s( ch, alpha11_temp ); \ \ /* psi1 = psi1 + alpha * alpha11 * chi1; */ \ - PASTEMAC(ch,axpys)( alpha_chi1, alpha11_temp, *psi1 ); \ + bli_taxpys( ch,ch,ch,ch, alpha_chi1, alpha11_temp, *psi1 ); \ \ /* psi1 = psi1 + alpha * a21' * x2; */ \ kfp_dv \ diff --git a/frame/2/hemv/bli_hemv_unb_var4.c b/frame/2/hemv/bli_hemv_unb_var4.c index 704299ab1f..ea2bee6d3d 100644 --- a/frame/2/hemv/bli_hemv_unb_var4.c +++ b/frame/2/hemv/bli_hemv_unb_var4.c @@ -92,7 +92,7 @@ void PASTEMAC(ch,varname) \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC(ch,setv,BLIS_TAPI_EX_SUF) \ @@ -135,8 +135,8 @@ void PASTEMAC(ch,varname) \ y2 = y + (i+1)*incy; \ \ /* Apply conjx to chi1 and and scale by alpha. */ \ - PASTEMAC(ch,copycjs)( conjx, *chi1, conjx_chi1 ); \ - PASTEMAC(ch,scal2s)( *alpha, conjx_chi1, alpha_chi1 ); \ + bli_tcopycjs( ch,ch, conjx, *chi1, conjx_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, conjx_chi1, alpha_chi1 ); \ \ /* y0 = y0 + alpha * a10t' * chi1; */ \ kfp_av \ @@ -151,12 +151,12 @@ void PASTEMAC(ch,varname) \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( alpha11_temp ); \ + bli_tseti0s( ch, alpha11_temp ); \ \ /* psi1 = psi1 + alpha * alpha11 * chi1; */ \ - PASTEMAC(ch,axpys)( alpha_chi1, alpha11_temp, *psi1 ); \ + bli_taxpys( ch,ch,ch,ch, alpha_chi1, alpha11_temp, *psi1 ); \ \ /* y2 = y2 + alpha * a21 * chi1; */ \ kfp_av \ diff --git a/frame/2/hemv/bli_hemv_unf_var1.c b/frame/2/hemv/bli_hemv_unf_var1.c index bb96d9ae59..45e7ef7b46 100644 --- a/frame/2/hemv/bli_hemv_unf_var1.c +++ b/frame/2/hemv/bli_hemv_unf_var1.c @@ -100,7 +100,7 @@ void PASTEMAC(ch,varname) \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC(ch,setv,BLIS_TAPI_EX_SUF) \ @@ -176,38 +176,38 @@ void PASTEMAC(ch,varname) \ y21 = y1 + (k+1)*incy; \ \ /* y01 = y01 + alpha * a10t' * chi11; */ \ - PASTEMAC(ch,copycjs)( conjx, *chi11, conjx_chi11 ); \ - PASTEMAC(ch,scal2s)( *alpha, conjx_chi11, alpha_chi11 ); \ + bli_tcopycjs( ch,ch, conjx, *chi11, conjx_chi11 ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, conjx_chi11, alpha_chi11 ); \ if ( bli_is_conj( conj1 ) ) \ { \ for ( j = 0; j < f_behind; ++j ) \ - PASTEMAC(ch,axpyjs)( alpha_chi11, *(a10t + j*cs_at), *(y01 + j*incy) ); \ + bli_taxpyjs( ch,ch,ch,ch, alpha_chi11, *(a10t + j*cs_at), *(y01 + j*incy) ); \ } \ else \ { \ for ( j = 0; j < f_behind; ++j ) \ - PASTEMAC(ch,axpys)( alpha_chi11, *(a10t + j*cs_at), *(y01 + j*incy) ); \ + bli_taxpys( ch,ch,ch,ch, alpha_chi11, *(a10t + j*cs_at), *(y01 + j*incy) ); \ } \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( alpha11_temp ); \ + bli_tseti0s( ch, alpha11_temp ); \ \ /* psi11 = psi11 + alpha * alpha11 * chi11; */ \ - PASTEMAC(ch,axpys)( alpha_chi11, alpha11_temp, *psi11 ); \ + bli_taxpys( ch,ch,ch,ch, alpha_chi11, alpha11_temp, *psi11 ); \ \ /* y21 = y21 + alpha * a21 * chi11; */ \ if ( bli_is_conj( conj0 ) ) \ { \ for ( j = 0; j < f_ahead; ++j ) \ - PASTEMAC(ch,axpyjs)( alpha_chi11, *(a21 + j*rs_at), *(y21 + j*incy) ); \ + bli_taxpyjs( ch,ch,ch,ch, alpha_chi11, *(a21 + j*rs_at), *(y21 + j*incy) ); \ } \ else \ { \ for ( j = 0; j < f_ahead; ++j ) \ - PASTEMAC(ch,axpys)( alpha_chi11, *(a21 + j*rs_at), *(y21 + j*incy) ); \ + bli_taxpys( ch,ch,ch,ch, alpha_chi11, *(a21 + j*rs_at), *(y21 + j*incy) ); \ } \ } \ } \ diff --git a/frame/2/hemv/bli_hemv_unf_var1a.c b/frame/2/hemv/bli_hemv_unf_var1a.c index f20a6de849..f4ee1988d9 100644 --- a/frame/2/hemv/bli_hemv_unf_var1a.c +++ b/frame/2/hemv/bli_hemv_unf_var1a.c @@ -91,7 +91,7 @@ void PASTEMAC(ch,varname) \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC(ch,setv,BLIS_TAPI_EX_SUF) \ @@ -132,8 +132,8 @@ void PASTEMAC(ch,varname) \ psi1 = y + (i )*incy; \ \ /* Apply conjx to chi1 and and scale by alpha. */ \ - PASTEMAC(ch,copycjs)( conjx, *chi1, conjx_chi1 ); \ - PASTEMAC(ch,scal2s)( *alpha, conjx_chi1, alpha_chi1 ); \ + bli_tcopycjs( ch,ch, conjx, *chi1, conjx_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, conjx_chi1, alpha_chi1 ); \ \ /* psi1 = psi1 + alpha * a10t * x0; (dotv) */ \ /* y0 = y0 + alpha * a10t' * chi1; (axpyv) */ \ @@ -150,16 +150,16 @@ void PASTEMAC(ch,varname) \ y0, incy, \ cntx \ ); \ - PASTEMAC(ch,axpys)( *alpha, rho, *psi1 ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, rho, *psi1 ); \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( alpha11_temp ); \ + bli_tseti0s( ch, alpha11_temp ); \ \ /* psi1 = psi1 + alpha * alpha11 * chi1; */ \ - PASTEMAC(ch,axpys)( alpha_chi1, alpha11_temp, *psi1 ); \ + bli_taxpys( ch,ch,ch,ch, alpha_chi1, alpha11_temp, *psi1 ); \ \ } \ } diff --git a/frame/2/hemv/bli_hemv_unf_var3.c b/frame/2/hemv/bli_hemv_unf_var3.c index ef25a35627..f0d9104290 100644 --- a/frame/2/hemv/bli_hemv_unf_var3.c +++ b/frame/2/hemv/bli_hemv_unf_var3.c @@ -100,7 +100,7 @@ void PASTEMAC(ch,varname) \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC(ch,setv,BLIS_TAPI_EX_SUF) \ @@ -156,38 +156,38 @@ void PASTEMAC(ch,varname) \ y21 = y1 + (k+1)*incy; \ \ /* y01 = y01 + alpha * a10t' * chi11; */ \ - PASTEMAC(ch,copycjs)( conjx, *chi11, conjx_chi11 ); \ - PASTEMAC(ch,scal2s)( *alpha, conjx_chi11, alpha_chi11 ); \ + bli_tcopycjs( ch,ch, conjx, *chi11, conjx_chi11 ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, conjx_chi11, alpha_chi11 ); \ if ( bli_is_conj( conj0 ) ) \ { \ for ( j = 0; j < f_behind; ++j ) \ - PASTEMAC(ch,axpyjs)( alpha_chi11, *(a10t + j*cs_at), *(y01 + j*incy) ); \ + bli_taxpyjs( ch,ch,ch,ch, alpha_chi11, *(a10t + j*cs_at), *(y01 + j*incy) ); \ } \ else \ { \ for ( j = 0; j < f_behind; ++j ) \ - PASTEMAC(ch,axpys)( alpha_chi11, *(a10t + j*cs_at), *(y01 + j*incy) ); \ + bli_taxpys( ch,ch,ch,ch, alpha_chi11, *(a10t + j*cs_at), *(y01 + j*incy) ); \ } \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( alpha11_temp ); \ + bli_tseti0s( ch, alpha11_temp ); \ \ /* psi11 = psi11 + alpha * alpha11 * chi11; */ \ - PASTEMAC(ch,axpys)( alpha_chi11, alpha11_temp, *psi11 ); \ + bli_taxpys( ch,ch,ch,ch, alpha_chi11, alpha11_temp, *psi11 ); \ \ /* y21 = y21 + alpha * a21 * chi11; */ \ if ( bli_is_conj( conj1 ) ) \ { \ for ( j = 0; j < f_ahead; ++j ) \ - PASTEMAC(ch,axpyjs)( alpha_chi11, *(a21 + j*rs_at), *(y21 + j*incy) ); \ + bli_taxpyjs( ch,ch,ch,ch, alpha_chi11, *(a21 + j*rs_at), *(y21 + j*incy) ); \ } \ else \ { \ for ( j = 0; j < f_ahead; ++j ) \ - PASTEMAC(ch,axpys)( alpha_chi11, *(a21 + j*rs_at), *(y21 + j*incy) ); \ + bli_taxpys( ch,ch,ch,ch, alpha_chi11, *(a21 + j*rs_at), *(y21 + j*incy) ); \ } \ } \ \ diff --git a/frame/2/hemv/bli_hemv_unf_var3a.c b/frame/2/hemv/bli_hemv_unf_var3a.c index 3501a9ac74..8e1ffc3e2a 100644 --- a/frame/2/hemv/bli_hemv_unf_var3a.c +++ b/frame/2/hemv/bli_hemv_unf_var3a.c @@ -91,7 +91,7 @@ void PASTEMAC(ch,varname) \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC(ch,setv,BLIS_TAPI_EX_SUF) \ @@ -133,16 +133,16 @@ void PASTEMAC(ch,varname) \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( alpha11_temp ); \ + bli_tseti0s( ch, alpha11_temp ); \ \ /* Apply conjx to chi1 and and scale by alpha. */ \ - PASTEMAC(ch,copycjs)( conjx, *chi1, conjx_chi1 ); \ - PASTEMAC(ch,scal2s)( *alpha, conjx_chi1, alpha_chi1 ); \ + bli_tcopycjs( ch,ch, conjx, *chi1, conjx_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, conjx_chi1, alpha_chi1 ); \ \ /* psi1 = psi1 + alpha * alpha11 * chi1; */ \ - PASTEMAC(ch,axpys)( alpha_chi1, alpha11_temp, *psi1 ); \ + bli_taxpys( ch,ch,ch,ch, alpha_chi1, alpha11_temp, *psi1 ); \ \ /* psi1 = psi1 + alpha * a21' * x2; (dotv) */ \ /* y2 = y2 + alpha * a21 * chi1; (axpyv) */ \ @@ -159,7 +159,7 @@ void PASTEMAC(ch,varname) \ y2, incy, \ cntx \ ); \ - PASTEMAC(ch,axpys)( *alpha, rho, *psi1 ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, rho, *psi1 ); \ } \ } diff --git a/frame/2/her/bli_her_unb_var1.c b/frame/2/her/bli_her_unb_var1.c index 7f7215c5d9..8491b75fcf 100644 --- a/frame/2/her/bli_her_unb_var1.c +++ b/frame/2/her/bli_her_unb_var1.c @@ -70,10 +70,10 @@ void PASTEMAC(ch,varname) \ \ /* Make a local copy of alpha and zero out the imaginary component if we are being invoked as her, since her requires alpha to be real. */ \ - PASTEMAC(ch,copys)( *alpha, alpha_local ); \ + bli_tcopys( ch,ch, *alpha, alpha_local ); \ if ( bli_is_conj( conjh ) ) \ { \ - PASTEMAC(ch,seti0s)( alpha_local ); \ + bli_tseti0s( ch, alpha_local ); \ } \ \ /* The algorithm will be expressed in terms of the lower triangular case; @@ -112,15 +112,15 @@ void PASTEMAC(ch,varname) \ gamma11 = c + (i )*rs_ct + (i )*cs_ct; \ \ /* Apply conjx to chi1. */ \ - PASTEMAC(ch,copycjs)( conj0, *chi1, conjx0_chi1 ); \ - PASTEMAC(ch,copycjs)( conj1, *chi1, conjx1_chi1 ); \ + bli_tcopycjs( ch,ch, conj0, *chi1, conjx0_chi1 ); \ + bli_tcopycjs( ch,ch, conj1, *chi1, conjx1_chi1 ); \ \ /* Compute scalar for vector subproblem. */ \ - PASTEMAC(ch,scal2s)( alpha_local, conjx0_chi1, alpha_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha_local, conjx0_chi1, alpha_chi1 ); \ \ /* Compute alpha * chi1 * conj(chi1) after chi1 has already been conjugated, if needed, by conjx. */ \ - PASTEMAC(ch,scal2s)( alpha_chi1, conjx1_chi1, alpha_chi1_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha_chi1, conjx1_chi1, alpha_chi1_chi1 ); \ \ /* c10t = c10t + alpha * chi1 * x0'; */ \ kfp_av \ @@ -134,12 +134,12 @@ void PASTEMAC(ch,varname) \ ); \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(chi1); */ \ - PASTEMAC(ch,adds)( alpha_chi1_chi1, *gamma11 ); \ + bli_tadds( ch,ch,ch, alpha_chi1_chi1, *gamma11 ); \ \ /* For her2, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( *gamma11 ); \ + bli_tseti0s( ch, *gamma11 ); \ } \ } diff --git a/frame/2/her/bli_her_unb_var2.c b/frame/2/her/bli_her_unb_var2.c index 1f071ca424..2693927d6b 100644 --- a/frame/2/her/bli_her_unb_var2.c +++ b/frame/2/her/bli_her_unb_var2.c @@ -70,10 +70,10 @@ void PASTEMAC(ch,varname) \ \ /* Make a local copy of alpha and zero out the imaginary component if we are being invoked as her, since her requires alpha to be real. */ \ - PASTEMAC(ch,copys)( *alpha, alpha_local ); \ + bli_tcopys( ch,ch, *alpha, alpha_local ); \ if ( bli_is_conj( conjh ) ) \ { \ - PASTEMAC(ch,seti0s)( alpha_local ); \ + bli_tseti0s( ch, alpha_local ); \ } \ \ /* The algorithm will be expressed in terms of the lower triangular case; @@ -112,15 +112,15 @@ void PASTEMAC(ch,varname) \ c21 = c + (i+1)*rs_ct + (i )*cs_ct; \ \ /* Apply conjx to chi1. */ \ - PASTEMAC(ch,copycjs)( conj0, *chi1, conjx0_chi1 ); \ - PASTEMAC(ch,copycjs)( conj1, *chi1, conjx1_chi1 ); \ + bli_tcopycjs( ch,ch, conj0, *chi1, conjx0_chi1 ); \ + bli_tcopycjs( ch,ch, conj1, *chi1, conjx1_chi1 ); \ \ /* Compute scalar for vector subproblem. */ \ - PASTEMAC(ch,scal2s)( alpha_local, conjx0_chi1, alpha_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha_local, conjx0_chi1, alpha_chi1 ); \ \ /* Compute alpha * chi1 * conj(chi1) after chi1 has already been conjugated, if needed, by conjx. */ \ - PASTEMAC(ch,scal2s)( alpha_chi1, conjx1_chi1, alpha_chi1_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha_chi1, conjx1_chi1, alpha_chi1_chi1 ); \ \ /* c21 = c21 + alpha * x2 * conj(chi1); */ \ kfp_av \ @@ -134,12 +134,12 @@ void PASTEMAC(ch,varname) \ ); \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(chi1); */ \ - PASTEMAC(ch,adds)( alpha_chi1_chi1, *gamma11 ); \ + bli_tadds( ch,ch,ch, alpha_chi1_chi1, *gamma11 ); \ \ /* For her, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( *gamma11 ); \ + bli_tseti0s( ch, *gamma11 ); \ } \ } diff --git a/frame/2/her2/bli_her2_unb_var1.c b/frame/2/her2/bli_her2_unb_var1.c index e0bfd77733..6a761c40a9 100644 --- a/frame/2/her2/bli_her2_unb_var1.c +++ b/frame/2/her2/bli_her2_unb_var1.c @@ -80,8 +80,8 @@ void PASTEMAC(ch,varname) \ rs_ct = rs_c; \ cs_ct = cs_c; \ \ - PASTEMAC(ch,copys)( *alpha, alpha0 ); \ - PASTEMAC(ch,copycjs)( conjh, *alpha, alpha1 ); \ + bli_tcopys( ch,ch, *alpha, alpha0 ); \ + bli_tcopycjs( ch,ch, conjh, *alpha, alpha1 ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ @@ -93,8 +93,8 @@ void PASTEMAC(ch,varname) \ conjx = bli_apply_conj( conjh, conjx ); \ conjy = bli_apply_conj( conjh, conjy ); \ \ - PASTEMAC(ch,copycjs)( conjh, *alpha, alpha0 ); \ - PASTEMAC(ch,copys)( *alpha, alpha1 ); \ + bli_tcopycjs( ch,ch, conjh, *alpha, alpha0 ); \ + bli_tcopys( ch,ch, *alpha, alpha1 ); \ } \ \ /* Apply conjh (which carries the conjugation component of the Hermitian @@ -117,17 +117,17 @@ void PASTEMAC(ch,varname) \ gamma11 = c + (i )*rs_ct + (i )*cs_ct; \ \ /* Apply conjx and/or conjy to chi1 and/or psi1. */ \ - PASTEMAC(ch,copycjs)( conjx, *chi1, conjx0_chi1 ); \ - PASTEMAC(ch,copycjs)( conjy, *psi1, conjy1_psi1 ); \ - PASTEMAC(ch,copycjs)( conj0, *psi1, conjy0_psi1 ); \ + bli_tcopycjs( ch,ch, conjx, *chi1, conjx0_chi1 ); \ + bli_tcopycjs( ch,ch, conjy, *psi1, conjy1_psi1 ); \ + bli_tcopycjs( ch,ch, conj0, *psi1, conjy0_psi1 ); \ \ /* Compute scalars for vector subproblems. */ \ - PASTEMAC(ch,scal2s)( alpha0, conjx0_chi1, alpha0_chi1 ); \ - PASTEMAC(ch,scal2s)( alpha1, conjy1_psi1, alpha1_psi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha0, conjx0_chi1, alpha0_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha1, conjy1_psi1, alpha1_psi1 ); \ \ /* Compute alpha * chi1 * conj(psi1) after both chi1 and psi1 have already been conjugated, if needed, by conjx and conjy. */ \ - PASTEMAC(ch,scal2s)( alpha0_chi1, conjy0_psi1, alpha0_chi1_psi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha0_chi1, conjy0_psi1, alpha0_chi1_psi1 ); \ \ /* c10t = c10t + alpha * chi1 * y0'; */ \ kfp_av \ @@ -153,13 +153,13 @@ void PASTEMAC(ch,varname) \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ + conj(alpha) * psi1 * conj(chi1); */ \ - PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ - PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ + bli_tadds( ch,ch,ch, alpha0_chi1_psi1, *gamma11 ); \ + bli_tadds( ch,ch,ch, alpha0_chi1_psi1, *gamma11 ); \ \ /* For her2, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( *gamma11 ); \ + bli_tseti0s( ch, *gamma11 ); \ } \ } diff --git a/frame/2/her2/bli_her2_unb_var2.c b/frame/2/her2/bli_her2_unb_var2.c index 0ab92fb384..9715d5c027 100644 --- a/frame/2/her2/bli_her2_unb_var2.c +++ b/frame/2/her2/bli_her2_unb_var2.c @@ -86,8 +86,8 @@ void PASTEMAC(ch,varname) \ rs_ct = rs_c; \ cs_ct = cs_c; \ \ - PASTEMAC(ch,copys)( *alpha, alpha0 ); \ - PASTEMAC(ch,copycjs)( conjh, *alpha, alpha1 ); \ + bli_tcopys( ch,ch, *alpha, alpha0 ); \ + bli_tcopycjs( ch,ch, conjh, *alpha, alpha1 ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ @@ -99,8 +99,8 @@ void PASTEMAC(ch,varname) \ conjx = bli_apply_conj( conjh, conjx ); \ conjy = bli_apply_conj( conjh, conjy ); \ \ - PASTEMAC(ch,copycjs)( conjh, *alpha, alpha0 ); \ - PASTEMAC(ch,copys)( *alpha, alpha1 ); \ + bli_tcopycjs( ch,ch, conjh, *alpha, alpha0 ); \ + bli_tcopys( ch,ch, *alpha, alpha1 ); \ } \ \ /* Apply conjh (which carries the conjugation component of the Hermitian @@ -126,17 +126,17 @@ void PASTEMAC(ch,varname) \ c21 = c + (i+1)*rs_ct + (i )*cs_ct; \ \ /* Apply conjx and/or conjy to chi1 and/or psi1. */ \ - PASTEMAC(ch,copycjs)( conjh_conjy, *psi1, conjy0_psi1 ); \ - PASTEMAC(ch,copycjs)( conjy, *psi1, conjy1_psi1 ); \ - PASTEMAC(ch,copycjs)( conj0, *chi1, conjx0_chi1 ); \ + bli_tcopycjs( ch,ch, conjh_conjy, *psi1, conjy0_psi1 ); \ + bli_tcopycjs( ch,ch, conjy, *psi1, conjy1_psi1 ); \ + bli_tcopycjs( ch,ch, conj0, *chi1, conjx0_chi1 ); \ \ /* Compute scalars for vector subproblems. */ \ - PASTEMAC(ch,scal2s)( alpha0, conjy0_psi1, alpha0_psi1 ); \ - PASTEMAC(ch,scal2s)( alpha1, conjy1_psi1, alpha1_psi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha0, conjy0_psi1, alpha0_psi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha1, conjy1_psi1, alpha1_psi1 ); \ \ /* Compute alpha * chi1 * conj(psi1) after both chi1 and psi1 have already been conjugated, if needed, by conjx and conjy. */ \ - PASTEMAC(ch,scal2s)( alpha0_psi1, conjx0_chi1, alpha0_chi1_psi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha0_psi1, conjx0_chi1, alpha0_chi1_psi1 ); \ \ /* c21 = c21 + alpha * x2 * conj(psi1); */ \ kfp_av \ @@ -162,13 +162,13 @@ void PASTEMAC(ch,varname) \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ + conj(alpha) * psi1 * conj(chi1); */ \ - PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ - PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ + bli_tadds( ch,ch,ch, alpha0_chi1_psi1, *gamma11 ); \ + bli_tadds( ch,ch,ch, alpha0_chi1_psi1, *gamma11 ); \ \ /* For her2, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( *gamma11 ); \ + bli_tseti0s( ch, *gamma11 ); \ } \ } diff --git a/frame/2/her2/bli_her2_unb_var3.c b/frame/2/her2/bli_her2_unb_var3.c index dc2630c46a..745a387f51 100644 --- a/frame/2/her2/bli_her2_unb_var3.c +++ b/frame/2/her2/bli_her2_unb_var3.c @@ -86,8 +86,8 @@ void PASTEMAC(ch,varname) \ rs_ct = rs_c; \ cs_ct = cs_c; \ \ - PASTEMAC(ch,copys)( *alpha, alpha0 ); \ - PASTEMAC(ch,copycjs)( conjh, *alpha, alpha1 ); \ + bli_tcopys( ch,ch, *alpha, alpha0 ); \ + bli_tcopycjs( ch,ch, conjh, *alpha, alpha1 ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ @@ -99,8 +99,8 @@ void PASTEMAC(ch,varname) \ conjx = bli_apply_conj( conjh, conjx ); \ conjy = bli_apply_conj( conjh, conjy ); \ \ - PASTEMAC(ch,copycjs)( conjh, *alpha, alpha0 ); \ - PASTEMAC(ch,copys)( *alpha, alpha1 ); \ + bli_tcopycjs( ch,ch, conjh, *alpha, alpha0 ); \ + bli_tcopys( ch,ch, *alpha, alpha1 ); \ } \ \ /* Apply conjh (which carries the conjugation component of the Hermitian @@ -126,17 +126,17 @@ void PASTEMAC(ch,varname) \ c21 = c + (i+1)*rs_ct + (i )*cs_ct; \ \ /* Apply conjx and/or conjy to chi1 and/or psi1. */ \ - PASTEMAC(ch,copycjs)( conjx, *chi1, conjx0_chi1 ); \ - PASTEMAC(ch,copycjs)( conjh_conjx, *chi1, conjx1_chi1 ); \ - PASTEMAC(ch,copycjs)( conj0, *psi1, conjy0_psi1 ); \ + bli_tcopycjs( ch,ch, conjx, *chi1, conjx0_chi1 ); \ + bli_tcopycjs( ch,ch, conjh_conjx, *chi1, conjx1_chi1 ); \ + bli_tcopycjs( ch,ch, conj0, *psi1, conjy0_psi1 ); \ \ /* Compute scalars for vector subproblems. */ \ - PASTEMAC(ch,scal2s)( alpha0, conjx0_chi1, alpha0_chi1 ); \ - PASTEMAC(ch,scal2s)( alpha1, conjx1_chi1, alpha1_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha0, conjx0_chi1, alpha0_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha1, conjx1_chi1, alpha1_chi1 ); \ \ /* Compute alpha * chi1 * conj(psi1) after both chi1 and psi1 have already been conjugated, if needed, by conjx and conjy. */ \ - PASTEMAC(ch,scal2s)( alpha0_chi1, conjy0_psi1, alpha0_chi1_psi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha0_chi1, conjy0_psi1, alpha0_chi1_psi1 ); \ \ /* c10t = c10t + alpha * chi1 * y0'; */ \ kfp_av \ @@ -162,13 +162,13 @@ void PASTEMAC(ch,varname) \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ + conj(alpha) * psi1 * conj(chi1); */ \ - PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ - PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ + bli_tadds( ch,ch,ch, alpha0_chi1_psi1, *gamma11 ); \ + bli_tadds( ch,ch,ch, alpha0_chi1_psi1, *gamma11 ); \ \ /* For her2, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( *gamma11 ); \ + bli_tseti0s( ch, *gamma11 ); \ } \ } diff --git a/frame/2/her2/bli_her2_unb_var4.c b/frame/2/her2/bli_her2_unb_var4.c index 59902654d9..e93d6a4895 100644 --- a/frame/2/her2/bli_her2_unb_var4.c +++ b/frame/2/her2/bli_her2_unb_var4.c @@ -86,8 +86,8 @@ void PASTEMAC(ch,varname) \ rs_ct = rs_c; \ cs_ct = cs_c; \ \ - PASTEMAC(ch,copys)( *alpha, alpha0 ); \ - PASTEMAC(ch,copycjs)( conjh, *alpha, alpha1 ); \ + bli_tcopys( ch,ch, *alpha, alpha0 ); \ + bli_tcopycjs( ch,ch, conjh, *alpha, alpha1 ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ @@ -99,8 +99,8 @@ void PASTEMAC(ch,varname) \ conjx = bli_apply_conj( conjh, conjx ); \ conjy = bli_apply_conj( conjh, conjy ); \ \ - PASTEMAC(ch,copycjs)( conjh, *alpha, alpha0 ); \ - PASTEMAC(ch,copys)( *alpha, alpha1 ); \ + bli_tcopycjs( ch,ch, conjh, *alpha, alpha0 ); \ + bli_tcopys( ch,ch, *alpha, alpha1 ); \ } \ \ /* Apply conjh (which carries the conjugation component of the Hermitian @@ -125,17 +125,17 @@ void PASTEMAC(ch,varname) \ c21 = c + (i+1)*rs_ct + (i )*cs_ct; \ \ /* Apply conjx and/or conjy to chi1 and/or psi1. */ \ - PASTEMAC(ch,copycjs)( conjh_conjy, *psi1, conjy0_psi1 ); \ - PASTEMAC(ch,copycjs)( conjh_conjx, *chi1, conjx1_chi1 ); \ - PASTEMAC(ch,copycjs)( conj0, *chi1, conjx0_chi1 ); \ + bli_tcopycjs( ch,ch, conjh_conjy, *psi1, conjy0_psi1 ); \ + bli_tcopycjs( ch,ch, conjh_conjx, *chi1, conjx1_chi1 ); \ + bli_tcopycjs( ch,ch, conj0, *chi1, conjx0_chi1 ); \ \ /* Compute scalars for vector subproblems. */ \ - PASTEMAC(ch,scal2s)( alpha0, conjy0_psi1, alpha0_psi1 ); \ - PASTEMAC(ch,scal2s)( alpha1, conjx1_chi1, alpha1_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha0, conjy0_psi1, alpha0_psi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha1, conjx1_chi1, alpha1_chi1 ); \ \ /* Compute alpha * chi1 * conj(psi1) after both chi1 and psi1 have already been conjugated, if needed, by conjx and conjy. */ \ - PASTEMAC(ch,scal2s)( alpha0_psi1, conjx0_chi1, alpha0_chi1_psi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha0_psi1, conjx0_chi1, alpha0_chi1_psi1 ); \ \ /* c21 = c21 + alpha * x2 * conj(psi1); */ \ kfp_av \ @@ -161,13 +161,13 @@ void PASTEMAC(ch,varname) \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ + conj(alpha) * psi1 * conj(chi1); */ \ - PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ - PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ + bli_tadds( ch,ch,ch, alpha0_chi1_psi1, *gamma11 ); \ + bli_tadds( ch,ch,ch, alpha0_chi1_psi1, *gamma11 ); \ \ /* For her2, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( *gamma11 ); \ + bli_tseti0s( ch, *gamma11 ); \ } \ } diff --git a/frame/2/her2/bli_her2_unf_var1.c b/frame/2/her2/bli_her2_unf_var1.c index aa0de6a2f2..085ff9003d 100644 --- a/frame/2/her2/bli_her2_unf_var1.c +++ b/frame/2/her2/bli_her2_unf_var1.c @@ -80,8 +80,8 @@ void PASTEMAC(ch,varname) \ rs_ct = rs_c; \ cs_ct = cs_c; \ \ - PASTEMAC(ch,copys)( *alpha, alpha0 ); \ - PASTEMAC(ch,copycjs)( conjh, *alpha, alpha1 ); \ + bli_tcopys( ch,ch, *alpha, alpha0 ); \ + bli_tcopycjs( ch,ch, conjh, *alpha, alpha1 ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ @@ -93,8 +93,8 @@ void PASTEMAC(ch,varname) \ conjx = bli_apply_conj( conjh, conjx ); \ conjy = bli_apply_conj( conjh, conjy ); \ \ - PASTEMAC(ch,copycjs)( conjh, *alpha, alpha0 ); \ - PASTEMAC(ch,copys)( *alpha, alpha1 ); \ + bli_tcopycjs( ch,ch, conjh, *alpha, alpha0 ); \ + bli_tcopys( ch,ch, *alpha, alpha1 ); \ } \ \ /* Apply conjh (which carries the conjugation component of the Hermitian @@ -117,17 +117,17 @@ void PASTEMAC(ch,varname) \ gamma11 = c + (i )*rs_ct + (i )*cs_ct; \ \ /* Apply conjx and/or conjy to chi1 and/or psi1. */ \ - PASTEMAC(ch,copycjs)( conjx, *chi1, conjx0_chi1 ); \ - PASTEMAC(ch,copycjs)( conjy, *psi1, conjy1_psi1 ); \ - PASTEMAC(ch,copycjs)( conj0, *psi1, conjy0_psi1 ); \ + bli_tcopycjs( ch,ch, conjx, *chi1, conjx0_chi1 ); \ + bli_tcopycjs( ch,ch, conjy, *psi1, conjy1_psi1 ); \ + bli_tcopycjs( ch,ch, conj0, *psi1, conjy0_psi1 ); \ \ /* Compute scalars for vector subproblems. */ \ - PASTEMAC(ch,scal2s)( alpha0, conjx0_chi1, alpha0_chi1 ); \ - PASTEMAC(ch,scal2s)( alpha1, conjy1_psi1, alpha1_psi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha0, conjx0_chi1, alpha0_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha1, conjy1_psi1, alpha1_psi1 ); \ \ /* Compute alpha * chi1 * conj(psi1) after both chi1 and psi1 have already been conjugated, if needed, by conjx and conjy. */ \ - PASTEMAC(ch,scal2s)( alpha0_chi1, conjy0_psi1, alpha0_chi1_psi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha0_chi1, conjy0_psi1, alpha0_chi1_psi1 ); \ \ /* c10t = c10t + alpha * chi1 * y0'; */ \ /* c10t = c10t + conj(alpha) * psi1 * x0'; */ \ @@ -146,13 +146,13 @@ void PASTEMAC(ch,varname) \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ + conj(alpha) * psi1 * conj(chi1); */ \ - PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ - PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ + bli_tadds( ch,ch,ch, alpha0_chi1_psi1, *gamma11 ); \ + bli_tadds( ch,ch,ch, alpha0_chi1_psi1, *gamma11 ); \ \ /* For her2, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( *gamma11 ); \ + bli_tseti0s( ch, *gamma11 ); \ } \ } diff --git a/frame/2/her2/bli_her2_unf_var4.c b/frame/2/her2/bli_her2_unf_var4.c index 4095e5e653..2430b7673b 100644 --- a/frame/2/her2/bli_her2_unf_var4.c +++ b/frame/2/her2/bli_her2_unf_var4.c @@ -86,8 +86,8 @@ void PASTEMAC(ch,varname) \ rs_ct = rs_c; \ cs_ct = cs_c; \ \ - PASTEMAC(ch,copys)( *alpha, alpha0 ); \ - PASTEMAC(ch,copycjs)( conjh, *alpha, alpha1 ); \ + bli_tcopys( ch,ch, *alpha, alpha0 ); \ + bli_tcopycjs( ch,ch, conjh, *alpha, alpha1 ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ @@ -99,8 +99,8 @@ void PASTEMAC(ch,varname) \ conjx = bli_apply_conj( conjh, conjx ); \ conjy = bli_apply_conj( conjh, conjy ); \ \ - PASTEMAC(ch,copycjs)( conjh, *alpha, alpha0 ); \ - PASTEMAC(ch,copys)( *alpha, alpha1 ); \ + bli_tcopycjs( ch,ch, conjh, *alpha, alpha0 ); \ + bli_tcopys( ch,ch, *alpha, alpha1 ); \ } \ \ /* Apply conjh (which carries the conjugation component of the Hermitian @@ -125,17 +125,17 @@ void PASTEMAC(ch,varname) \ c21 = c + (i+1)*rs_ct + (i )*cs_ct; \ \ /* Apply conjx and/or conjy to chi1 and/or psi1. */ \ - PASTEMAC(ch,copycjs)( conjh_conjy, *psi1, conjy0_psi1 ); \ - PASTEMAC(ch,copycjs)( conjh_conjx, *chi1, conjx1_chi1 ); \ - PASTEMAC(ch,copycjs)( conj0, *chi1, conjx0_chi1 ); \ + bli_tcopycjs( ch,ch, conjh_conjy, *psi1, conjy0_psi1 ); \ + bli_tcopycjs( ch,ch, conjh_conjx, *chi1, conjx1_chi1 ); \ + bli_tcopycjs( ch,ch, conj0, *chi1, conjx0_chi1 ); \ \ /* Compute scalars for vector subproblems. */ \ - PASTEMAC(ch,scal2s)( alpha0, conjy0_psi1, alpha0_psi1 ); \ - PASTEMAC(ch,scal2s)( alpha1, conjx1_chi1, alpha1_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha0, conjy0_psi1, alpha0_psi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha1, conjx1_chi1, alpha1_chi1 ); \ \ /* Compute alpha * chi1 * conj(psi1) after both chi1 and psi1 have already been conjugated, if needed, by conjx and conjy. */ \ - PASTEMAC(ch,scal2s)( alpha0_psi1, conjx0_chi1, alpha0_chi1_psi1 ); \ + bli_tscal2s( ch,ch,ch,ch, alpha0_psi1, conjx0_chi1, alpha0_chi1_psi1 ); \ \ /* c21 = c21 + alpha * x2 * conj(psi1); */ \ /* c21 = c21 + conj(alpha) * y2 * conj(chi1); */ \ @@ -154,13 +154,13 @@ void PASTEMAC(ch,varname) \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ + conj(alpha) * psi1 * conj(chi1); */ \ - PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ - PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ + bli_tadds( ch,ch,ch, alpha0_chi1_psi1, *gamma11 ); \ + bli_tadds( ch,ch,ch, alpha0_chi1_psi1, *gamma11 ); \ \ /* For her2, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ - PASTEMAC(ch,seti0s)( *gamma11 ); \ + bli_tseti0s( ch, *gamma11 ); \ } \ } diff --git a/frame/2/trmv/bli_trmv_unb_var1.c b/frame/2/trmv/bli_trmv_unb_var1.c index 36ba911b82..fb80f72baf 100644 --- a/frame/2/trmv/bli_trmv_unb_var1.c +++ b/frame/2/trmv/bli_trmv_unb_var1.c @@ -96,10 +96,10 @@ void PASTEMAC(ch,varname) \ x2 = x + (i+1)*incx; \ \ /* chi1 = alpha * alpha11 * chi1; */ \ - PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ + bli_tcopys( ch,ch, *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ - PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ - PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi1 ); \ + bli_tscalcjs( ch,ch,ch, conja, *alpha11, alpha_alpha11_conj ); \ + bli_tscals( ch,ch,ch, alpha_alpha11_conj, *chi1 ); \ \ /* chi1 = chi1 + alpha * a12t * x2; */ \ kfp_dv \ @@ -112,7 +112,7 @@ void PASTEMAC(ch,varname) \ &rho, \ cntx \ ); \ - PASTEMAC(ch,axpys)( *alpha, rho, *chi1 ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, rho, *chi1 ); \ } \ } \ else /* if ( bli_is_lower( uploa_trans ) ) */ \ @@ -127,10 +127,10 @@ void PASTEMAC(ch,varname) \ x0 = x + (0 )*incx; \ \ /* chi1 = alpha * alpha11 * chi1; */ \ - PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ + bli_tcopys( ch,ch, *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ - PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ - PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi1 ); \ + bli_tscalcjs( ch,ch,ch, conja, *alpha11, alpha_alpha11_conj ); \ + bli_tscals( ch,ch,ch, alpha_alpha11_conj, *chi1 ); \ \ /* chi1 = chi1 + alpha * a10t * x0; */ \ kfp_dv \ @@ -143,7 +143,7 @@ void PASTEMAC(ch,varname) \ &rho, \ cntx \ ); \ - PASTEMAC(ch,axpys)( *alpha, rho, *chi1 ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, rho, *chi1 ); \ } \ } \ } diff --git a/frame/2/trmv/bli_trmv_unb_var2.c b/frame/2/trmv/bli_trmv_unb_var2.c index 91b85d685f..0b18da5e0d 100644 --- a/frame/2/trmv/bli_trmv_unb_var2.c +++ b/frame/2/trmv/bli_trmv_unb_var2.c @@ -96,7 +96,7 @@ void PASTEMAC(ch,varname) \ x0 = x + (0 )*incx; \ \ /* x0 = x0 + alpha * chi1 * a01; */ \ - PASTEMAC(ch,scal2s)( *alpha, *chi1, alpha_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, *chi1, alpha_chi1 ); \ kfp_av \ ( \ conja, \ @@ -108,10 +108,10 @@ void PASTEMAC(ch,varname) \ ); \ \ /* chi1 = alpha * alpha11 * chi1; */ \ - PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ + bli_tcopys( ch,ch, *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ - PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ - PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi1 ); \ + bli_tscalcjs( ch,ch,ch, conja, *alpha11, alpha_alpha11_conj ); \ + bli_tscals( ch,ch,ch, alpha_alpha11_conj, *chi1 ); \ } \ } \ else /* if ( bli_is_lower( uploa_trans ) ) */ \ @@ -126,7 +126,7 @@ void PASTEMAC(ch,varname) \ x2 = x + (i+1)*incx; \ \ /* x2 = x2 + alpha * chi1 * a21; */ \ - PASTEMAC(ch,scal2s)( *alpha, *chi1, alpha_chi1 ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, *chi1, alpha_chi1 ); \ kfp_av \ ( \ conja, \ @@ -138,10 +138,10 @@ void PASTEMAC(ch,varname) \ ); \ \ /* chi1 = alpha * alpha11 * chi1; */ \ - PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ + bli_tcopys( ch,ch, *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ - PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ - PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi1 ); \ + bli_tscalcjs( ch,ch,ch, conja, *alpha11, alpha_alpha11_conj ); \ + bli_tscals( ch,ch,ch, alpha_alpha11_conj, *chi1 ); \ } \ } \ } diff --git a/frame/2/trmv/bli_trmv_unf_var1.c b/frame/2/trmv/bli_trmv_unf_var1.c index 70b4fa7f1f..3983ad85a9 100644 --- a/frame/2/trmv/bli_trmv_unf_var1.c +++ b/frame/2/trmv/bli_trmv_unf_var1.c @@ -116,24 +116,24 @@ void PASTEMAC(ch,varname) \ x21 = x1 + (l+1)*incx; \ \ /* chi11 = alpha * alpha11 * chi11; */ \ - PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ + bli_tcopys( ch,ch, *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ - PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ - PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi11 ); \ + bli_tscalcjs( ch,ch,ch, conja, *alpha11, alpha_alpha11_conj ); \ + bli_tscals( ch,ch,ch, alpha_alpha11_conj, *chi11 ); \ \ /* chi11 = chi11 + alpha * a12t * x21; */ \ - PASTEMAC(ch,set0s)( rho1 ); \ + bli_tset0s( ch, rho1 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_ahead; ++j ) \ - PASTEMAC(ch,dotjs)( *(a12t + j*cs_at), *(x21 + j*incx), rho1 ); \ + bli_tdotjs( ch,ch,ch,ch, *(a12t + j*cs_at), *(x21 + j*incx), rho1 ); \ } \ else \ { \ for ( j = 0; j < f_ahead; ++j ) \ - PASTEMAC(ch,dots)( *(a12t + j*cs_at), *(x21 + j*incx), rho1 ); \ + bli_tdots( ch,ch,ch,ch, *(a12t + j*cs_at), *(x21 + j*incx), rho1 ); \ } \ - PASTEMAC(ch,axpys)( *alpha, rho1, *chi11 ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, rho1, *chi11 ); \ } \ \ /* x1 = x1 + alpha * A12 * x2; */ \ @@ -175,24 +175,24 @@ void PASTEMAC(ch,varname) \ x01 = x1 + (0 )*incx; \ \ /* chi11 = alpha * alpha11 * chi11; */ \ - PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ + bli_tcopys( ch,ch, *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ - PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ - PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi11 ); \ + bli_tscalcjs( ch,ch,ch, conja, *alpha11, alpha_alpha11_conj ); \ + bli_tscals( ch,ch,ch, alpha_alpha11_conj, *chi11 ); \ \ /* chi11 = chi11 + alpha * a10t * x01; */ \ - PASTEMAC(ch,set0s)( rho1 ); \ + bli_tset0s( ch, rho1 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_ahead; ++j ) \ - PASTEMAC(ch,dotjs)( *(a10t + j*cs_at), *(x01 + j*incx), rho1 ); \ + bli_tdotjs( ch,ch,ch,ch, *(a10t + j*cs_at), *(x01 + j*incx), rho1 ); \ } \ else \ { \ for ( j = 0; j < f_ahead; ++j ) \ - PASTEMAC(ch,dots)( *(a10t + j*cs_at), *(x01 + j*incx), rho1 ); \ + bli_tdots( ch,ch,ch,ch, *(a10t + j*cs_at), *(x01 + j*incx), rho1 ); \ } \ - PASTEMAC(ch,axpys)( *alpha, rho1, *chi11 ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, rho1, *chi11 ); \ } \ \ /* x1 = x1 + alpha * A10 * x0; */ \ diff --git a/frame/2/trmv/bli_trmv_unf_var2.c b/frame/2/trmv/bli_trmv_unf_var2.c index 981a819de9..7cfad81a6c 100644 --- a/frame/2/trmv/bli_trmv_unf_var2.c +++ b/frame/2/trmv/bli_trmv_unf_var2.c @@ -129,23 +129,23 @@ void PASTEMAC(ch,varname) \ x01 = x1 + (0 )*incx; \ \ /* x01 = x01 + alpha * chi11 * a01; */ \ - PASTEMAC(ch,scal2s)( *alpha, *chi11, alpha_chi11 ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, *chi11, alpha_chi11 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_behind; ++j ) \ - PASTEMAC(ch,axpyjs)( alpha_chi11, *(a01 + j*rs_at), *(x01 + j*incx) ); \ + bli_taxpyjs( ch,ch,ch,ch, alpha_chi11, *(a01 + j*rs_at), *(x01 + j*incx) ); \ } \ else \ { \ for ( j = 0; j < f_behind; ++j ) \ - PASTEMAC(ch,axpys)( alpha_chi11, *(a01 + j*rs_at), *(x01 + j*incx) ); \ + bli_taxpys( ch,ch,ch,ch, alpha_chi11, *(a01 + j*rs_at), *(x01 + j*incx) ); \ } \ \ /* chi11 = alpha * alpha11 * chi11; */ \ - PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ + bli_tcopys( ch,ch, *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ - PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ - PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi11 ); \ + bli_tscalcjs( ch,ch,ch, conja, *alpha11, alpha_alpha11_conj ); \ + bli_tscals( ch,ch,ch, alpha_alpha11_conj, *chi11 ); \ } \ } \ } \ @@ -186,23 +186,23 @@ void PASTEMAC(ch,varname) \ x21 = x1 + (l+1)*incx; \ \ /* x21 = x21 + alpha * chi11 * a21; */ \ - PASTEMAC(ch,scal2s)( *alpha, *chi11, alpha_chi11 ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, *chi11, alpha_chi11 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_behind; ++j ) \ - PASTEMAC(ch,axpyjs)( alpha_chi11, *(a21 + j*rs_at), *(x21 + j*incx) ); \ + bli_taxpyjs( ch,ch,ch,ch, alpha_chi11, *(a21 + j*rs_at), *(x21 + j*incx) ); \ } \ else \ { \ for ( j = 0; j < f_behind; ++j ) \ - PASTEMAC(ch,axpys)( alpha_chi11, *(a21 + j*rs_at), *(x21 + j*incx) ); \ + bli_taxpys( ch,ch,ch,ch, alpha_chi11, *(a21 + j*rs_at), *(x21 + j*incx) ); \ } \ \ /* chi11 = alpha * alpha11 * chi11; */ \ - PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ + bli_tcopys( ch,ch, *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ - PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ - PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi11 ); \ + bli_tscalcjs( ch,ch,ch, conja, *alpha11, alpha_alpha11_conj ); \ + bli_tscals( ch,ch,ch, alpha_alpha11_conj, *chi11 ); \ } \ } \ } \ diff --git a/frame/2/trsv/bli_trsv_unb_var1.c b/frame/2/trsv/bli_trsv_unb_var1.c index 99ddce8617..6c80570548 100644 --- a/frame/2/trsv/bli_trsv_unb_var1.c +++ b/frame/2/trsv/bli_trsv_unb_var1.c @@ -117,13 +117,13 @@ void PASTEMAC(ch,varname) \ &rho, \ cntx \ ); \ - PASTEMAC(ch,subs)( rho, *chi1 ); \ + bli_tsubs( ch,ch,ch, rho, *chi1 ); \ \ /* chi1 = chi1 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ - PASTEMAC(ch,invscals)( alpha11_conj, *chi1 ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_conj ); \ + bli_tinvscals( ch,ch,ch, alpha11_conj, *chi1 ); \ } \ } \ } \ @@ -149,13 +149,13 @@ void PASTEMAC(ch,varname) \ &rho, \ cntx \ ); \ - PASTEMAC(ch,subs)( rho, *chi1 ); \ + bli_tsubs( ch,ch,ch, rho, *chi1 ); \ \ /* chi1 = chi1 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ - PASTEMAC(ch,invscals)( alpha11_conj, *chi1 ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_conj ); \ + bli_tinvscals( ch,ch,ch, alpha11_conj, *chi1 ); \ } \ } \ } \ diff --git a/frame/2/trsv/bli_trsv_unb_var2.c b/frame/2/trsv/bli_trsv_unb_var2.c index aed530c2dd..1d3dd7a164 100644 --- a/frame/2/trsv/bli_trsv_unb_var2.c +++ b/frame/2/trsv/bli_trsv_unb_var2.c @@ -109,12 +109,12 @@ void PASTEMAC(ch,varname) \ /* chi1 = chi1 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ - PASTEMAC(ch,invscals)( alpha11_conj, *chi1 ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_conj ); \ + bli_tinvscals( ch,ch,ch, alpha11_conj, *chi1 ); \ } \ \ /* x0 = x0 - chi1 * a01; */ \ - PASTEMAC(ch,neg2s)( *chi1, minus_chi1 ); \ + bli_tneg2s( ch,ch, *chi1, minus_chi1 ); \ kfp_av \ ( \ conja, \ @@ -140,12 +140,12 @@ void PASTEMAC(ch,varname) \ /* chi1 = chi1 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ - PASTEMAC(ch,invscals)( alpha11_conj, *chi1 ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_conj ); \ + bli_tinvscals( ch,ch,ch, alpha11_conj, *chi1 ); \ } \ \ /* x2 = x2 - chi1 * a21; */ \ - PASTEMAC(ch,neg2s)( *chi1, minus_chi1 ); \ + bli_tneg2s( ch,ch, *chi1, minus_chi1 ); \ kfp_av \ ( \ conja, \ diff --git a/frame/2/trsv/bli_trsv_unf_var1.c b/frame/2/trsv/bli_trsv_unf_var1.c index 109184a7c2..bf30903a15 100644 --- a/frame/2/trsv/bli_trsv_unf_var1.c +++ b/frame/2/trsv/bli_trsv_unf_var1.c @@ -143,24 +143,24 @@ void PASTEMAC(ch,varname) \ x21 = x1 + (l+1)*incx; \ \ /* chi11 = chi11 - a12t * x21; */ \ - PASTEMAC(ch,set0s)( rho1 ); \ + bli_tset0s( ch, rho1 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_behind; ++j ) \ - PASTEMAC(ch,dotjs)( *(a12t + j*cs_at), *(x21 + j*incx), rho1 ); \ + bli_tdotjs( ch,ch,ch,ch, *(a12t + j*cs_at), *(x21 + j*incx), rho1 ); \ } \ else \ { \ for ( j = 0; j < f_behind; ++j ) \ - PASTEMAC(ch,dots)( *(a12t + j*cs_at), *(x21 + j*incx), rho1 ); \ + bli_tdots( ch,ch,ch,ch, *(a12t + j*cs_at), *(x21 + j*incx), rho1 ); \ } \ - PASTEMAC(ch,subs)( rho1, *chi11 ); \ + bli_tsubs( ch,ch,ch, rho1, *chi11 ); \ \ /* chi11 = chi11 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ - PASTEMAC(ch,invscals)( alpha11_conj, *chi11 ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_conj ); \ + bli_tinvscals( ch,ch,ch, alpha11_conj, *chi11 ); \ } \ } \ } \ @@ -203,24 +203,24 @@ void PASTEMAC(ch,varname) \ x01 = x1 + (0 )*incx; \ \ /* chi11 = chi11 - a10t * x01; */ \ - PASTEMAC(ch,set0s)( rho1 ); \ + bli_tset0s( ch, rho1 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_behind; ++j ) \ - PASTEMAC(ch,dotjs)( *(a10t + j*cs_at), *(x01 + j*incx), rho1 ); \ + bli_tdotjs( ch,ch,ch,ch, *(a10t + j*cs_at), *(x01 + j*incx), rho1 ); \ } \ else \ { \ for ( j = 0; j < f_behind; ++j ) \ - PASTEMAC(ch,dots)( *(a10t + j*cs_at), *(x01 + j*incx), rho1 ); \ + bli_tdots( ch,ch,ch,ch, *(a10t + j*cs_at), *(x01 + j*incx), rho1 ); \ } \ - PASTEMAC(ch,subs)( rho1, *chi11 ); \ + bli_tsubs( ch,ch,ch, rho1, *chi11 ); \ \ /* chi11 = chi11 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ - PASTEMAC(ch,invscals)( alpha11_conj, *chi11 ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_conj ); \ + bli_tinvscals( ch,ch,ch, alpha11_conj, *chi11 ); \ } \ } \ } \ diff --git a/frame/2/trsv/bli_trsv_unf_var2.c b/frame/2/trsv/bli_trsv_unf_var2.c index 5055b9a62f..79f459601b 100644 --- a/frame/2/trsv/bli_trsv_unf_var2.c +++ b/frame/2/trsv/bli_trsv_unf_var2.c @@ -129,21 +129,21 @@ void PASTEMAC(ch,varname) \ /* chi11 = chi11 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ - PASTEMAC(ch,invscals)( alpha11_conj, *chi11 ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_conj ); \ + bli_tinvscals( ch,ch,ch, alpha11_conj, *chi11 ); \ } \ \ /* x01 = x01 - chi11 * a01; */ \ - PASTEMAC(ch,neg2s)( *chi11, minus_chi11 ); \ + bli_tneg2s( ch,ch, *chi11, minus_chi11 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_ahead; ++j ) \ - PASTEMAC(ch,axpyjs)( minus_chi11, *(a01 + j*rs_at), *(x01 + j*incx) ); \ + bli_taxpyjs( ch,ch,ch,ch, minus_chi11, *(a01 + j*rs_at), *(x01 + j*incx) ); \ } \ else \ { \ for ( j = 0; j < f_ahead; ++j ) \ - PASTEMAC(ch,axpys)( minus_chi11, *(a01 + j*rs_at), *(x01 + j*incx) ); \ + bli_taxpys( ch,ch,ch,ch, minus_chi11, *(a01 + j*rs_at), *(x01 + j*incx) ); \ } \ } \ \ @@ -187,21 +187,21 @@ void PASTEMAC(ch,varname) \ /* chi11 = chi11 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ - PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ - PASTEMAC(ch,invscals)( alpha11_conj, *chi11 ); \ + bli_tcopycjs( ch,ch, conja, *alpha11, alpha11_conj ); \ + bli_tinvscals( ch,ch,ch, alpha11_conj, *chi11 ); \ } \ \ /* x21 = x21 - chi11 * a21; */ \ - PASTEMAC(ch,neg2s)( *chi11, minus_chi11 ); \ + bli_tneg2s( ch,ch, *chi11, minus_chi11 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_ahead; ++j ) \ - PASTEMAC(ch,axpyjs)( minus_chi11, *(a21 + j*rs_at), *(x21 + j*incx) ); \ + bli_taxpyjs( ch,ch,ch,ch, minus_chi11, *(a21 + j*rs_at), *(x21 + j*incx) ); \ } \ else \ { \ for ( j = 0; j < f_ahead; ++j ) \ - PASTEMAC(ch,axpys)( minus_chi11, *(a21 + j*rs_at), *(x21 + j*incx) ); \ + bli_taxpys( ch,ch,ch,ch, minus_chi11, *(a21 + j*rs_at), *(x21 + j*incx) ); \ } \ } \ \ diff --git a/frame/3/bli_l3_sup_var12.c b/frame/3/bli_l3_sup_var12.c index 6941424162..a0a7c5e424 100644 --- a/frame/3/bli_l3_sup_var12.c +++ b/frame/3/bli_l3_sup_var12.c @@ -214,7 +214,7 @@ void PASTEMAC(ch,varname) \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* If alpha is zero, scale by beta and return. */ \ - if ( PASTEMAC(ch,eq0)( *(( ctype* )alpha) ) ) \ + if ( bli_teq0s( ch, *(( ctype* )alpha) ) ) \ { \ PASTEMAC(ch,scalm) \ ( \ @@ -561,7 +561,7 @@ void PASTEMAC(ch,varname) \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* If alpha is zero, scale by beta and return. */ \ - if ( PASTEMAC(ch,eq0)( *(( ctype* )alpha) ) ) \ + if ( bli_teq0s( ch, *(( ctype* )alpha) ) ) \ { \ PASTEMAC(ch,scalm) \ ( \ diff --git a/frame/3/gemmt/bli_gemmt_l_ker_var2.c b/frame/3/gemmt/bli_gemmt_l_ker_var2.c index 02ff808e2e..f86806eb06 100644 --- a/frame/3/gemmt/bli_gemmt_l_ker_var2.c +++ b/frame/3/gemmt/bli_gemmt_l_ker_var2.c @@ -62,14 +62,16 @@ BLIS_INLINE void PASTEMAC(ch,op) \ const ctype* restrict b_cast = b; \ ctype* restrict y_cast = y; \ \ - PASTEMAC(ch,ch,ch,xpbys_mxn_l) \ + bli_txpbys_mxn_uplo \ ( \ + ch,ch,ch,ch, \ diagoff, \ + BLIS_LOWER, \ m, \ n, \ x_cast, rs_x, cs_x, \ b_cast, \ - y_cast, rs_y, cs_y \ + y_cast, rs_y, cs_y \ ); \ } diff --git a/frame/3/gemmt/bli_gemmt_l_ker_var2b.c b/frame/3/gemmt/bli_gemmt_l_ker_var2b.c index a11f599d31..eaef84d537 100644 --- a/frame/3/gemmt/bli_gemmt_l_ker_var2b.c +++ b/frame/3/gemmt/bli_gemmt_l_ker_var2b.c @@ -62,14 +62,16 @@ BLIS_INLINE void PASTEMAC(ch,op) \ const ctype* restrict b_cast = b; \ ctype* restrict y_cast = y; \ \ - PASTEMAC(ch,ch,ch,xpbys_mxn_l) \ + bli_txpbys_mxn_uplo \ ( \ + ch,ch,ch,ch, \ diagoff, \ + BLIS_LOWER, \ m, \ n, \ x_cast, rs_x, cs_x, \ b_cast, \ - y_cast, rs_y, cs_y \ + y_cast, rs_y, cs_y \ ); \ } diff --git a/frame/3/gemmt/bli_gemmt_u_ker_var2.c b/frame/3/gemmt/bli_gemmt_u_ker_var2.c index 34a10914f9..082c388ea6 100644 --- a/frame/3/gemmt/bli_gemmt_u_ker_var2.c +++ b/frame/3/gemmt/bli_gemmt_u_ker_var2.c @@ -62,14 +62,16 @@ BLIS_INLINE void PASTEMAC(ch,op) \ const ctype* restrict b_cast = b; \ ctype* restrict y_cast = y; \ \ - PASTEMAC(ch,ch,ch,xpbys_mxn_u) \ + bli_txpbys_mxn_uplo \ ( \ + ch,ch,ch,ch, \ diagoff, \ + BLIS_UPPER, \ m, \ n, \ x_cast, rs_x, cs_x, \ b_cast, \ - y_cast, rs_y, cs_y \ + y_cast, rs_y, cs_y \ ); \ } diff --git a/frame/3/gemmt/bli_gemmt_u_ker_var2b.c b/frame/3/gemmt/bli_gemmt_u_ker_var2b.c index aa9f3bc5ef..b4ed52e410 100644 --- a/frame/3/gemmt/bli_gemmt_u_ker_var2b.c +++ b/frame/3/gemmt/bli_gemmt_u_ker_var2b.c @@ -62,14 +62,16 @@ BLIS_INLINE void PASTEMAC(ch,op) \ const ctype* restrict b_cast = b; \ ctype* restrict y_cast = y; \ \ - PASTEMAC(ch,ch,ch,xpbys_mxn_u) \ + bli_txpbys_mxn_uplo \ ( \ + ch,ch,ch,ch, \ diagoff, \ + BLIS_UPPER, \ m, \ n, \ x_cast, rs_x, cs_x, \ b_cast, \ - y_cast, rs_y, cs_y \ + y_cast, rs_y, cs_y \ ); \ } diff --git a/frame/base/bli_machval.c b/frame/base/bli_machval.c index a4b9223bcf..7c42284d68 100644 --- a/frame/base/bli_machval.c +++ b/frame/base/bli_machval.c @@ -113,7 +113,7 @@ void PASTEMAC(chv,opname) \ \ /* Copy the requested parameter value to the output buffer, which may involve a demotion from the complex to real domain. */ \ - PASTEMAC(chvr,chv,copys)( pvals[ val_i ], *v_cast ); \ + bli_tcopys( chvr,chv, pvals[ val_i ], *v_cast ); \ } INSERT_GENTFUNCR_BASIC( machval, lamch ) diff --git a/frame/base/bli_obj.c b/frame/base/bli_obj.c index e4e79d9f44..0c22f1a131 100644 --- a/frame/base/bli_obj.c +++ b/frame/base/bli_obj.c @@ -123,10 +123,10 @@ void bli_obj_create_without_buffer // for A and B are merged). //if ( bli_is_float( dt ) ) { bli_sset1s( *(( float* )s) ); } //else if ( bli_is_double( dt ) ) { bli_dset1s( *(( double* )s) ); } - if ( bli_is_float( dt ) ) { bli_cset1s( *(( scomplex* )s) ); } - else if ( bli_is_double( dt ) ) { bli_zset1s( *(( dcomplex* )s) ); } - else if ( bli_is_scomplex( dt ) ) { bli_cset1s( *(( scomplex* )s) ); } - else if ( bli_is_dcomplex( dt ) ) { bli_zset1s( *(( dcomplex* )s) ); } + if ( bli_is_float( dt ) ) { bli_tset1s( c, *(( scomplex* )s) ); } + else if ( bli_is_double( dt ) ) { bli_tset1s( z, *(( dcomplex* )s) ); } + else if ( bli_is_scomplex( dt ) ) { bli_tset1s( c, *(( scomplex* )s) ); } + else if ( bli_is_dcomplex( dt ) ) { bli_tset1s( z, *(( dcomplex* )s) ); } } void bli_obj_alloc_buffer diff --git a/frame/base/bli_query.c b/frame/base/bli_query.c index 140fc2f978..7824e1e482 100644 --- a/frame/base/bli_query.c +++ b/frame/base/bli_query.c @@ -77,7 +77,7 @@ bool bli_obj_equals( const obj_t* a, const obj_t* b ) else if ( dt == BLIS_DOUBLE ) r_val = bli_deqa( buf_a, buf_b ); else if ( dt == BLIS_SCOMPLEX ) r_val = bli_ceqa( buf_a, buf_b ); else if ( dt == BLIS_DCOMPLEX ) r_val = bli_zeqa( buf_a, buf_b ); - else if ( dt == BLIS_INT ) r_val = bli_ieqa( buf_a, buf_b ); + else if ( dt == BLIS_INT ) r_val = bli_ieq( buf_a, buf_b ); } return r_val; @@ -181,7 +181,7 @@ bool bli_obj_imag_is_zero( const obj_t* a ) bli_getsc( a, &a_r, &a_i ); // Compare the imaginary part of a to double-precision zero. - if ( !bli_deq0( a_i ) ) r_val = FALSE; + if ( !bli_teq0s( d, a_i ) ) r_val = FALSE; } return r_val; diff --git a/frame/base/bli_setgetijm.c b/frame/base/bli_setgetijm.c index 5a89d258e8..9c826af062 100644 --- a/frame/base/bli_setgetijm.c +++ b/frame/base/bli_setgetijm.c @@ -101,7 +101,7 @@ void PASTEMAC(ch,opname) \ \ ctype* b_ij = b_cast + (i )*rs + (j )*cs; \ \ - PASTEMAC(z,ch,sets)( ar, ai, *b_ij ); \ + bli_tsets( z,ch, ar, ai, *b_ij ); \ } INSERT_GENTFUNC_BASIC( setijm ) @@ -175,7 +175,7 @@ void PASTEMAC(ch,opname) \ \ const ctype* b_ij = b_cast + (i )*rs + (j )*cs; \ \ - PASTEMAC(ch,z,gets)( *b_ij, *ar, *ai ); \ + bli_tgets( ch,z, *b_ij, *ar, *ai ); \ } INSERT_GENTFUNC_BASIC( getijm ) diff --git a/frame/base/bli_setgetijv.c b/frame/base/bli_setgetijv.c index a6ae2860d9..dc50faae47 100644 --- a/frame/base/bli_setgetijv.c +++ b/frame/base/bli_setgetijv.c @@ -94,7 +94,7 @@ void PASTEMAC(ch,opname) \ \ ctype* restrict x_i = x_cast + (i )*incx; \ \ - PASTEMAC(z,ch,sets)( ar, ai, *x_i ); \ + bli_tsets( z,ch, ar, ai, *x_i ); \ } INSERT_GENTFUNC_BASIC( setijv ) @@ -161,7 +161,7 @@ void PASTEMAC(ch,opname) \ \ const ctype* restrict x_i = x_cast + (i )*incx; \ \ - PASTEMAC(ch,z,gets)( *x_i, *ar, *ai ); \ + bli_tgets( ch,z, *x_i, *ar, *ai ); \ } INSERT_GENTFUNC_BASIC( getijv ) diff --git a/frame/base/cast/bli_castm.c b/frame/base/cast/bli_castm.c index 6ae848b4c0..12dcbb0732 100644 --- a/frame/base/cast/bli_castm.c +++ b/frame/base/cast/bli_castm.c @@ -150,7 +150,7 @@ void PASTEMAC(cha,chb,opname) \ \ for ( i = 0; i < n_elem; ++i ) \ { \ - PASTEMAC(cha,chb,copyjs)( a1[i], b1[i] ); \ + bli_tcopyjs( cha,chb, a1[i], b1[i] ); \ } \ } \ } \ @@ -163,7 +163,7 @@ void PASTEMAC(cha,chb,opname) \ \ for ( i = 0; i < n_elem; ++i ) \ { \ - PASTEMAC(cha,chb,copyjs)( *a1, *b1 ); \ + bli_tcopyjs( cha,chb, *a1, *b1 ); \ \ a1 += inca; \ b1 += incb; \ @@ -182,7 +182,7 @@ void PASTEMAC(cha,chb,opname) \ \ for ( i = 0; i < n_elem; ++i ) \ { \ - PASTEMAC(cha,chb,copys)( a1[i], b1[i] ); \ + bli_tcopys( cha,chb, a1[i], b1[i] ); \ } \ } \ } \ @@ -195,7 +195,7 @@ void PASTEMAC(cha,chb,opname) \ \ for ( i = 0; i < n_elem; ++i ) \ { \ - PASTEMAC(cha,chb,copys)( *a1, *b1 ); \ + bli_tcopys( cha,chb, *a1, *b1 ); \ \ a1 += inca; \ b1 += incb; \ diff --git a/frame/base/cast/bli_castnzm.c b/frame/base/cast/bli_castnzm.c index 3c2bbcb574..18f2098382 100644 --- a/frame/base/cast/bli_castnzm.c +++ b/frame/base/cast/bli_castnzm.c @@ -150,7 +150,7 @@ void PASTEMAC(cha,chb,opname) \ \ for ( i = 0; i < n_elem; ++i ) \ { \ - PASTEMAC(cha,chb,copyjnzs)( a1[i], b1[i] ); \ + bli_tcopyjnzs( cha,chb, a1[i], b1[i] ); \ } \ } \ } \ @@ -163,7 +163,7 @@ void PASTEMAC(cha,chb,opname) \ \ for ( i = 0; i < n_elem; ++i ) \ { \ - PASTEMAC(cha,chb,copyjnzs)( *a1, *b1 ); \ + bli_tcopyjnzs( cha,chb, *a1, *b1 ); \ \ a1 += inca; \ b1 += incb; \ @@ -182,7 +182,7 @@ void PASTEMAC(cha,chb,opname) \ \ for ( i = 0; i < n_elem; ++i ) \ { \ - PASTEMAC(cha,chb,copynzs)( a1[i], b1[i] ); \ + bli_tcopynzs( cha,chb, a1[i], b1[i] ); \ } \ } \ } \ @@ -195,7 +195,7 @@ void PASTEMAC(cha,chb,opname) \ \ for ( i = 0; i < n_elem; ++i ) \ { \ - PASTEMAC(cha,chb,copynzs)( *a1, *b1 ); \ + bli_tcopynzs( cha,chb, *a1, *b1 ); \ \ a1 += inca; \ b1 += incb; \ diff --git a/frame/base/cast/bli_castv.c b/frame/base/cast/bli_castv.c index 468ff9109b..c110d4da1c 100644 --- a/frame/base/cast/bli_castv.c +++ b/frame/base/cast/bli_castv.c @@ -123,14 +123,14 @@ void PASTEMAC(chx,chy,opname) \ { \ for ( i = 0; i < n; ++i ) \ { \ - PASTEMAC(chx,chy,copyjs)( x1[i], y1[i] ); \ + bli_tcopyjs( chx,chy, x1[i], y1[i] ); \ } \ } \ else \ { \ for ( i = 0; i < n; ++i ) \ { \ - PASTEMAC(chx,chy,copyjs)( *x1, *y1 ); \ + bli_tcopyjs( chx,chy, *x1, *y1 ); \ \ x1 += incx; \ y1 += incy; \ @@ -143,14 +143,14 @@ void PASTEMAC(chx,chy,opname) \ { \ for ( i = 0; i < n; ++i ) \ { \ - PASTEMAC(chx,chy,copys)( x1[i], y1[i] ); \ + bli_tcopys( chx,chy, x1[i], y1[i] ); \ } \ } \ else \ { \ for ( i = 0; i < n; ++i ) \ { \ - PASTEMAC(chx,chy,copys)( *x1, *y1 ); \ + bli_tcopys( chx,chy, *x1, *y1 ); \ \ x1 += incx; \ y1 += incy; \ diff --git a/frame/compat/amd/bla_gemv_amd.c b/frame/compat/amd/bla_gemv_amd.c index 5cd523f178..7dbb8e4361 100644 --- a/frame/compat/amd/bla_gemv_amd.c +++ b/frame/compat/amd/bla_gemv_amd.c @@ -116,7 +116,7 @@ void PASTEF77(ch,blasname) \ bli_convert_blas_incv( m_y, (ftype*)y, *incy, y0, incy0 ); \ \ /* If alpha is zero, scale y by beta and return early. */ \ - if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + if ( bli_teq0s( ch, *alpha ) ) \ { \ PASTEMAC(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ diff --git a/frame/compat/bla_dot.c b/frame/compat/bla_dot.c index b68af083ce..e462a82912 100644 --- a/frame/compat/bla_dot.c +++ b/frame/compat/bla_dot.c @@ -207,8 +207,8 @@ double PASTEF77(d,sdot) float* chi1 = x0 + (i )*incx0; float* psi1 = y0 + (i )*incy0; - bli_ddots( (( double )(*chi1)), - (( double )(*psi1)), rho ); + bli_tdots( d,d,d,d, (( double )(*chi1)), + (( double )(*psi1)), rho ); } /* Finalization of BLIS is not required, because initialization was diff --git a/frame/compat/bla_her2k.c b/frame/compat/bla_her2k.c index 25e9fb4317..f6f58d401f 100644 --- a/frame/compat/bla_her2k.c +++ b/frame/compat/bla_her2k.c @@ -93,8 +93,8 @@ void PASTEF77(ch,blasname) \ - the rank-2k product is empty (either because alpha is zero or k is zero) AND matrix C is not scaled. */ \ if ( m0 == 0 || \ - ( ( PASTEMAC(ch,eq0)( *alpha ) || k0 == 0 ) \ - && PASTEMAC(chr,eq1)( *beta ) \ + ( ( bli_teq0s( ch, *alpha ) || k0 == 0 ) \ + && bli_teq1s( chr, *beta ) \ ) \ ) \ { \ @@ -186,8 +186,8 @@ void PASTEF77(ch,blasname) \ - the rank-2k product is empty (either because alpha is zero or k is zero) AND matrix C is not scaled. */ \ if ( m0 == 0 || \ - ( ( PASTEMAC(ch,eq0)( *alpha ) || k0 == 0 ) \ - && PASTEMAC(chr,eq1)( *beta ) \ + ( ( bli_teq0s( ch, *alpha ) || k0 == 0 ) \ + && bli_teq1s( chr, *beta ) \ ) \ ) \ { \ diff --git a/frame/compat/bla_herk.c b/frame/compat/bla_herk.c index a9f01268d9..4f447fabaa 100644 --- a/frame/compat/bla_herk.c +++ b/frame/compat/bla_herk.c @@ -91,8 +91,8 @@ void PASTEF77(ch,blasname) \ - the rank-k product is empty (either because alpha is zero or k is zero) AND matrix C is not scaled. */ \ if ( m0 == 0 || \ - ( ( PASTEMAC(chr,eq0)( *alpha ) || k0 == 0 ) \ - && PASTEMAC(chr,eq1)( *beta ) \ + ( ( bli_teq0s( chr, *alpha ) || k0 == 0 ) \ + && bli_teq1s( chr, *beta ) \ ) \ ) \ { \ @@ -178,8 +178,8 @@ void PASTEF77(ch,blasname) \ - the rank-k product is empty (either because alpha is zero or k is zero) AND matrix C is not scaled. */ \ if ( m0 == 0 || \ - ( ( PASTEMAC(chr,eq0)( *alpha ) || k0 == 0 ) \ - && PASTEMAC(chr,eq1)( *beta ) \ + ( ( bli_teq0s( chr, *alpha ) || k0 == 0 ) \ + && bli_teq1s( chr, *beta ) \ ) \ ) \ { \ diff --git a/frame/compat/bla_scal.c b/frame/compat/bla_scal.c index 0acf7c10dd..543515a3fc 100644 --- a/frame/compat/bla_scal.c +++ b/frame/compat/bla_scal.c @@ -39,7 +39,7 @@ // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCSCAL -#define GENTFUNCSCAL( ftype_x, ftype_a, chx, cha, blasname, blisname ) \ +#define GENTFUNCSCAL( ftype_x, ftype_a, chx, cha, cha_real, blasname, blisname ) \ \ void PASTEF77(chx,cha,blasname) \ ( \ @@ -67,7 +67,7 @@ void PASTEF77(chx,cha,blasname) \ that is, we just always sub-optimally implement those cases by casting alpha to ctype_x (potentially the complex domain) and using the homogeneous datatype instance according to that type. */ \ - PASTEMAC(cha,chx,copys)( *alpha, alpha_cast ); \ + bli_tcopys( cha_real,chx, *alpha, alpha_cast ); \ \ /* Call BLIS interface. */ \ PASTEMAC(chx,blisname,BLIS_TAPI_EX_SUF) \ diff --git a/frame/compat/f2c/bla_gbmv.c b/frame/compat/f2c/bla_gbmv.c index 320b496844..0edc7cbe9e 100644 --- a/frame/compat/f2c/bla_gbmv.c +++ b/frame/compat/f2c/bla_gbmv.c @@ -268,7 +268,7 @@ i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; - bli_csets( (0.f), (0.f), y[i__2] ); + bli_tsets( c,c, (0.f), (0.f), y[i__2] ); /* L10: */ } } else { @@ -276,8 +276,8 @@ for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; - bli_csets( (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); /* L20: */ } } @@ -287,7 +287,7 @@ i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; - bli_csets( (0.f), (0.f), y[i__2] ); + bli_tsets( c,c, (0.f), (0.f), y[i__2] ); iy += *incy; /* L30: */ } @@ -296,8 +296,8 @@ for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; i__3 = iy; - bli_csets( (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); iy += *incy; /* L40: */ } @@ -319,8 +319,8 @@ i__2 = jx; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { i__2 = jx; - bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); k = kup1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *ku; @@ -331,9 +331,9 @@ i__2 = i__; i__3 = i__; i__5 = k + i__ + j * a_dim1; - bli_csets( (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); - bli_csets( (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); /* L50: */ } } @@ -346,8 +346,8 @@ i__4 = jx; if (bli_creal(x[i__4]) != 0.f || bli_cimag(x[i__4]) != 0.f) { i__4 = jx; - bli_csets( (bli_creal(*alpha) * bli_creal(x[i__4]) - bli_cimag(*alpha) * bli_cimag(x[i__4])), (bli_creal(*alpha) * bli_cimag(x[i__4]) + bli_cimag(*alpha) * bli_creal(x[i__4])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(x[i__4]) - bli_cimag(*alpha) * bli_cimag(x[i__4])), (bli_creal(*alpha) * bli_cimag(x[i__4]) + bli_cimag(*alpha) * bli_creal(x[i__4])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); iy = ky; k = kup1 - j; /* Computing MAX */ @@ -359,9 +359,9 @@ i__4 = iy; i__2 = iy; i__5 = k + i__ + j * a_dim1; - bli_csets( (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); - bli_csets( (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); iy += *incy; /* L70: */ } @@ -381,7 +381,7 @@ if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { - bli_csets( (0.f), (0.f), temp ); + bli_tsets( c,c, (0.f), (0.f), temp ); k = kup1 - j; if (noconj) { /* Computing MAX */ @@ -392,9 +392,9 @@ for (i__ = f2c_max(i__3,i__4); i__ <= i__2; ++i__) { i__3 = k + i__ + j * a_dim1; i__4 = i__; - bli_csets( (bli_creal(a[i__3]) * bli_creal(x[i__4]) - bli_cimag(a[i__3]) * bli_cimag(x[i__4])), (bli_creal(a[i__3]) * bli_cimag(x[i__4]) + bli_cimag(a[i__3]) * bli_creal(x[i__4])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(a[i__3]) * bli_creal(x[i__4]) - bli_cimag(a[i__3]) * bli_cimag(x[i__4])), (bli_creal(a[i__3]) * bli_cimag(x[i__4]) + bli_cimag(a[i__3]) * bli_creal(x[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L90: */ } } else { @@ -406,24 +406,24 @@ for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { bla_r_cnjg(&q__3, &a[k + i__ + j * a_dim1]); i__2 = i__; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L100: */ } } i__4 = jy; i__2 = jy; - bli_csets( (bli_creal(*alpha) * bli_creal(temp) - bli_cimag(*alpha) * bli_cimag(temp)), (bli_creal(*alpha) * bli_cimag(temp) + bli_cimag(*alpha) * bli_creal(temp)), q__2 ); - bli_csets( (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(temp) - bli_cimag(*alpha) * bli_cimag(temp)), (bli_creal(*alpha) * bli_cimag(temp) + bli_cimag(*alpha) * bli_creal(temp)), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); jy += *incy; /* L110: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { - bli_csets( (0.f), (0.f), temp ); + bli_tsets( c,c, (0.f), (0.f), temp ); ix = kx; k = kup1 - j; if (noconj) { @@ -435,9 +435,9 @@ for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { i__4 = k + i__ + j * a_dim1; i__2 = ix; - bli_csets( (bli_creal(a[i__4]) * bli_creal(x[i__2]) - bli_cimag(a[i__4]) * bli_cimag(x[i__2])), (bli_creal(a[i__4]) * bli_cimag(x[i__2]) + bli_cimag(a[i__4]) * bli_creal(x[i__2])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(a[i__4]) * bli_creal(x[i__2]) - bli_cimag(a[i__4]) * bli_cimag(x[i__2])), (bli_creal(a[i__4]) * bli_cimag(x[i__2]) + bli_cimag(a[i__4]) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L120: */ } @@ -450,18 +450,18 @@ for (i__ = f2c_max(i__3,i__4); i__ <= i__2; ++i__) { bla_r_cnjg(&q__3, &a[k + i__ + j * a_dim1]); i__3 = ix; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L130: */ } } i__2 = jy; i__3 = jy; - bli_csets( (bli_creal(*alpha) * bli_creal(temp) - bli_cimag(*alpha) * bli_cimag(temp)), (bli_creal(*alpha) * bli_cimag(temp) + bli_cimag(*alpha) * bli_creal(temp)), q__2 ); - bli_csets( (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(temp) - bli_cimag(*alpha) * bli_cimag(temp)), (bli_creal(*alpha) * bli_cimag(temp) + bli_cimag(*alpha) * bli_creal(temp)), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); jy += *incy; if (j > *ku) { kx += *incx; @@ -1421,7 +1421,7 @@ i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; - bli_zsets( (0.), (0.), y[i__2] ); + bli_tsets( z,z, (0.), (0.), y[i__2] ); /* L10: */ } } else { @@ -1429,8 +1429,8 @@ for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; - bli_zsets( (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); /* L20: */ } } @@ -1440,7 +1440,7 @@ i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; - bli_zsets( (0.), (0.), y[i__2] ); + bli_tsets( z,z, (0.), (0.), y[i__2] ); iy += *incy; /* L30: */ } @@ -1449,8 +1449,8 @@ for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; i__3 = iy; - bli_zsets( (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); iy += *incy; /* L40: */ } @@ -1472,8 +1472,8 @@ i__2 = jx; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { i__2 = jx; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); k = kup1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *ku; @@ -1484,9 +1484,9 @@ i__2 = i__; i__3 = i__; i__5 = k + i__ + j * a_dim1; - bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); - bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); /* L50: */ } } @@ -1499,8 +1499,8 @@ i__4 = jx; if (bli_zreal(x[i__4]) != 0. || bli_zimag(x[i__4]) != 0.) { i__4 = jx; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__4]) - bli_zimag(*alpha) * bli_zimag(x[i__4])), (bli_zreal(*alpha) * bli_zimag(x[i__4]) + bli_zimag(*alpha) * bli_zreal(x[i__4])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(x[i__4]) - bli_zimag(*alpha) * bli_zimag(x[i__4])), (bli_zreal(*alpha) * bli_zimag(x[i__4]) + bli_zimag(*alpha) * bli_zreal(x[i__4])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); iy = ky; k = kup1 - j; /* Computing MAX */ @@ -1512,9 +1512,9 @@ i__4 = iy; i__2 = iy; i__5 = k + i__ + j * a_dim1; - bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); - bli_zsets( (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); iy += *incy; /* L70: */ } @@ -1534,7 +1534,7 @@ if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { - bli_zsets( (0.), (0.), temp ); + bli_tsets( z,z, (0.), (0.), temp ); k = kup1 - j; if (noconj) { /* Computing MAX */ @@ -1545,9 +1545,9 @@ for (i__ = f2c_max(i__3,i__4); i__ <= i__2; ++i__) { i__3 = k + i__ + j * a_dim1; i__4 = i__; - bli_zsets( (bli_zreal(a[i__3]) * bli_zreal(x[i__4]) - bli_zimag(a[i__3]) * bli_zimag(x[i__4])), (bli_zreal(a[i__3]) * bli_zimag(x[i__4]) + bli_zimag(a[i__3]) * bli_zreal(x[i__4])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(a[i__3]) * bli_zreal(x[i__4]) - bli_zimag(a[i__3]) * bli_zimag(x[i__4])), (bli_zreal(a[i__3]) * bli_zimag(x[i__4]) + bli_zimag(a[i__3]) * bli_zreal(x[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L90: */ } } else { @@ -1559,24 +1559,24 @@ for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { bla_d_cnjg(&z__3, &a[k + i__ + j * a_dim1]); i__2 = i__; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L100: */ } } i__4 = jy; i__2 = jy; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp) - bli_zimag(*alpha) * bli_zimag(temp)), (bli_zreal(*alpha) * bli_zimag(temp) + bli_zimag(*alpha) * bli_zreal(temp)), z__2 ); - bli_zsets( (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(temp) - bli_zimag(*alpha) * bli_zimag(temp)), (bli_zreal(*alpha) * bli_zimag(temp) + bli_zimag(*alpha) * bli_zreal(temp)), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); jy += *incy; /* L110: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { - bli_zsets( (0.), (0.), temp ); + bli_tsets( z,z, (0.), (0.), temp ); ix = kx; k = kup1 - j; if (noconj) { @@ -1588,9 +1588,9 @@ for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { i__4 = k + i__ + j * a_dim1; i__2 = ix; - bli_zsets( (bli_zreal(a[i__4]) * bli_zreal(x[i__2]) - bli_zimag(a[i__4]) * bli_zimag(x[i__2])), (bli_zreal(a[i__4]) * bli_zimag(x[i__2]) + bli_zimag(a[i__4]) * bli_zreal(x[i__2])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(a[i__4]) * bli_zreal(x[i__2]) - bli_zimag(a[i__4]) * bli_zimag(x[i__2])), (bli_zreal(a[i__4]) * bli_zimag(x[i__2]) + bli_zimag(a[i__4]) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L120: */ } @@ -1603,18 +1603,18 @@ for (i__ = f2c_max(i__3,i__4); i__ <= i__2; ++i__) { bla_d_cnjg(&z__3, &a[k + i__ + j * a_dim1]); i__3 = ix; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L130: */ } } i__2 = jy; i__3 = jy; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp) - bli_zimag(*alpha) * bli_zimag(temp)), (bli_zreal(*alpha) * bli_zimag(temp) + bli_zimag(*alpha) * bli_zreal(temp)), z__2 ); - bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(temp) - bli_zimag(*alpha) * bli_zimag(temp)), (bli_zreal(*alpha) * bli_zimag(temp) + bli_zimag(*alpha) * bli_zreal(temp)), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); jy += *incy; if (j > *ku) { kx += *incx; diff --git a/frame/compat/f2c/bla_hbmv.c b/frame/compat/f2c/bla_hbmv.c index c20a720f92..6c3f45f5f6 100644 --- a/frame/compat/f2c/bla_hbmv.c +++ b/frame/compat/f2c/bla_hbmv.c @@ -254,7 +254,7 @@ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; - bli_csets( (0.f), (0.f), y[i__2] ); + bli_tsets( c,c, (0.f), (0.f), y[i__2] ); /* L10: */ } } else { @@ -262,8 +262,8 @@ for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; - bli_csets( (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); /* L20: */ } } @@ -273,7 +273,7 @@ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; - bli_csets( (0.f), (0.f), y[i__2] ); + bli_tsets( c,c, (0.f), (0.f), y[i__2] ); iy += *incy; /* L30: */ } @@ -282,8 +282,8 @@ for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; i__3 = iy; - bli_csets( (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); iy += *incy; /* L40: */ } @@ -302,9 +302,9 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; - bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); - bli_csets( (0.f), (0.f), temp2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); + bli_tsets( c,c, (0.f), (0.f), temp2 ); l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; @@ -313,25 +313,25 @@ i__2 = i__; i__3 = i__; i__5 = l + i__ + j * a_dim1; - bli_csets( (bli_creal(temp1) * bli_creal(a[i__5]) - bli_cimag(temp1) * bli_cimag(a[i__5])), (bli_creal(temp1) * bli_cimag(a[i__5]) + bli_cimag(temp1) * bli_creal(a[i__5])), q__2 ); - bli_csets( (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (bli_creal(temp1) * bli_creal(a[i__5]) - bli_cimag(temp1) * bli_cimag(a[i__5])), (bli_creal(temp1) * bli_cimag(a[i__5]) + bli_cimag(temp1) * bli_creal(a[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__2 = i__; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); - bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); /* L50: */ } i__4 = j; i__2 = j; i__3 = kplus1 + j * a_dim1; r__1 = bli_creal(a[i__3]); - bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__3 ); - bli_csets( (bli_creal(y[i__2]) + bli_creal(q__3)), (bli_cimag(y[i__2]) + bli_cimag(q__3)), q__2 ); - bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__4 ); - bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); + bli_tsets( c,c, (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__3 ); + bli_tsets( c,c, (bli_creal(y[i__2]) + bli_creal(q__3)), (bli_cimag(y[i__2]) + bli_cimag(q__3)), q__2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__4 ); + bli_tsets( c,c, (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); /* L60: */ } } else { @@ -340,9 +340,9 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__4 = jx; - bli_csets( (bli_creal(*alpha) * bli_creal(x[i__4]) - bli_cimag(*alpha) * bli_cimag(x[i__4])), (bli_creal(*alpha) * bli_cimag(x[i__4]) + bli_cimag(*alpha) * bli_creal(x[i__4])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); - bli_csets( (0.f), (0.f), temp2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(x[i__4]) - bli_cimag(*alpha) * bli_cimag(x[i__4])), (bli_creal(*alpha) * bli_cimag(x[i__4]) + bli_cimag(*alpha) * bli_creal(x[i__4])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); + bli_tsets( c,c, (0.f), (0.f), temp2 ); ix = kx; iy = ky; l = kplus1 - j; @@ -353,14 +353,14 @@ i__4 = iy; i__2 = iy; i__5 = l + i__ + j * a_dim1; - bli_csets( (bli_creal(temp1) * bli_creal(a[i__5]) - bli_cimag(temp1) * bli_cimag(a[i__5])), (bli_creal(temp1) * bli_cimag(a[i__5]) + bli_cimag(temp1) * bli_creal(a[i__5])), q__2 ); - bli_csets( (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); + bli_tsets( c,c, (bli_creal(temp1) * bli_creal(a[i__5]) - bli_cimag(temp1) * bli_cimag(a[i__5])), (bli_creal(temp1) * bli_cimag(a[i__5]) + bli_cimag(temp1) * bli_creal(a[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__4 = ix; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); - bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); ix += *incx; iy += *incy; /* L70: */ @@ -369,11 +369,11 @@ i__4 = jy; i__2 = kplus1 + j * a_dim1; r__1 = bli_creal(a[i__2]); - bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__3 ); - bli_csets( (bli_creal(y[i__4]) + bli_creal(q__3)), (bli_cimag(y[i__4]) + bli_cimag(q__3)), q__2 ); - bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__4 ); - bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); + bli_tsets( c,c, (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__3 ); + bli_tsets( c,c, (bli_creal(y[i__4]) + bli_creal(q__3)), (bli_cimag(y[i__4]) + bli_cimag(q__3)), q__2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__4 ); + bli_tsets( c,c, (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); jx += *incx; jy += *incy; if (j > *k) { @@ -391,16 +391,16 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__3 = j; - bli_csets( (bli_creal(*alpha) * bli_creal(x[i__3]) - bli_cimag(*alpha) * bli_cimag(x[i__3])), (bli_creal(*alpha) * bli_cimag(x[i__3]) + bli_cimag(*alpha) * bli_creal(x[i__3])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); - bli_csets( (0.f), (0.f), temp2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(x[i__3]) - bli_cimag(*alpha) * bli_cimag(x[i__3])), (bli_creal(*alpha) * bli_cimag(x[i__3]) + bli_cimag(*alpha) * bli_creal(x[i__3])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); + bli_tsets( c,c, (0.f), (0.f), temp2 ); i__3 = j; i__4 = j; i__2 = j * a_dim1 + 1; r__1 = bli_creal(a[i__2]); - bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__2 ); - bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); + bli_tsets( c,c, (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); l = 1 - j; /* Computing MIN */ i__4 = *n, i__2 = j + *k; @@ -409,21 +409,21 @@ i__4 = i__; i__2 = i__; i__5 = l + i__ + j * a_dim1; - bli_csets( (bli_creal(temp1) * bli_creal(a[i__5]) - bli_cimag(temp1) * bli_cimag(a[i__5])), (bli_creal(temp1) * bli_cimag(a[i__5]) + bli_cimag(temp1) * bli_creal(a[i__5])), q__2 ); - bli_csets( (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); + bli_tsets( c,c, (bli_creal(temp1) * bli_creal(a[i__5]) - bli_cimag(temp1) * bli_cimag(a[i__5])), (bli_creal(temp1) * bli_cimag(a[i__5]) + bli_cimag(temp1) * bli_creal(a[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__4 = i__; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); - bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); /* L90: */ } i__3 = j; i__4 = j; - bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__2 ); - bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); /* L100: */ } } else { @@ -432,16 +432,16 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__3 = jx; - bli_csets( (bli_creal(*alpha) * bli_creal(x[i__3]) - bli_cimag(*alpha) * bli_cimag(x[i__3])), (bli_creal(*alpha) * bli_cimag(x[i__3]) + bli_cimag(*alpha) * bli_creal(x[i__3])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); - bli_csets( (0.f), (0.f), temp2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(x[i__3]) - bli_cimag(*alpha) * bli_cimag(x[i__3])), (bli_creal(*alpha) * bli_cimag(x[i__3]) + bli_cimag(*alpha) * bli_creal(x[i__3])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); + bli_tsets( c,c, (0.f), (0.f), temp2 ); i__3 = jy; i__4 = jy; i__2 = j * a_dim1 + 1; r__1 = bli_creal(a[i__2]); - bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__2 ); - bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); + bli_tsets( c,c, (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); l = 1 - j; ix = jx; iy = jy; @@ -454,21 +454,21 @@ i__4 = iy; i__2 = iy; i__5 = l + i__ + j * a_dim1; - bli_csets( (bli_creal(temp1) * bli_creal(a[i__5]) - bli_cimag(temp1) * bli_cimag(a[i__5])), (bli_creal(temp1) * bli_cimag(a[i__5]) + bli_cimag(temp1) * bli_creal(a[i__5])), q__2 ); - bli_csets( (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); + bli_tsets( c,c, (bli_creal(temp1) * bli_creal(a[i__5]) - bli_cimag(temp1) * bli_cimag(a[i__5])), (bli_creal(temp1) * bli_cimag(a[i__5]) + bli_cimag(temp1) * bli_creal(a[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__4 = ix; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); - bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); /* L110: */ } i__3 = jy; i__4 = jy; - bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__2 ); - bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); jx += *incx; jy += *incy; /* L120: */ @@ -700,7 +700,7 @@ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; - bli_zsets( (0.), (0.), y[i__2] ); + bli_tsets( z,z, (0.), (0.), y[i__2] ); /* L10: */ } } else { @@ -708,8 +708,8 @@ for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; - bli_zsets( (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); /* L20: */ } } @@ -719,7 +719,7 @@ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; - bli_zsets( (0.), (0.), y[i__2] ); + bli_tsets( z,z, (0.), (0.), y[i__2] ); iy += *incy; /* L30: */ } @@ -728,8 +728,8 @@ for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; i__3 = iy; - bli_zsets( (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); iy += *incy; /* L40: */ } @@ -748,9 +748,9 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); - bli_zsets( (0.), (0.), temp2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); + bli_tsets( z,z, (0.), (0.), temp2 ); l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; @@ -759,25 +759,25 @@ i__2 = i__; i__3 = i__; i__5 = l + i__ + j * a_dim1; - bli_zsets( (bli_zreal(temp1) * bli_zreal(a[i__5]) - bli_zimag(temp1) * bli_zimag(a[i__5])), (bli_zreal(temp1) * bli_zimag(a[i__5]) + bli_zimag(temp1) * bli_zreal(a[i__5])), z__2 ); - bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (bli_zreal(temp1) * bli_zreal(a[i__5]) - bli_zimag(temp1) * bli_zimag(a[i__5])), (bli_zreal(temp1) * bli_zimag(a[i__5]) + bli_zimag(temp1) * bli_zreal(a[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__2 = i__; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); - bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); /* L50: */ } i__4 = j; i__2 = j; i__3 = kplus1 + j * a_dim1; d__1 = bli_zreal(a[i__3]); - bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__3 ); - bli_zsets( (bli_zreal(y[i__2]) + bli_zreal(z__3)), (bli_zimag(y[i__2]) + bli_zimag(z__3)), z__2 ); - bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__4 ); - bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); + bli_tsets( z,z, (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__3 ); + bli_tsets( z,z, (bli_zreal(y[i__2]) + bli_zreal(z__3)), (bli_zimag(y[i__2]) + bli_zimag(z__3)), z__2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__4 ); + bli_tsets( z,z, (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); /* L60: */ } } else { @@ -786,9 +786,9 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__4 = jx; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__4]) - bli_zimag(*alpha) * bli_zimag(x[i__4])), (bli_zreal(*alpha) * bli_zimag(x[i__4]) + bli_zimag(*alpha) * bli_zreal(x[i__4])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); - bli_zsets( (0.), (0.), temp2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(x[i__4]) - bli_zimag(*alpha) * bli_zimag(x[i__4])), (bli_zreal(*alpha) * bli_zimag(x[i__4]) + bli_zimag(*alpha) * bli_zreal(x[i__4])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); + bli_tsets( z,z, (0.), (0.), temp2 ); ix = kx; iy = ky; l = kplus1 - j; @@ -799,14 +799,14 @@ i__4 = iy; i__2 = iy; i__5 = l + i__ + j * a_dim1; - bli_zsets( (bli_zreal(temp1) * bli_zreal(a[i__5]) - bli_zimag(temp1) * bli_zimag(a[i__5])), (bli_zreal(temp1) * bli_zimag(a[i__5]) + bli_zimag(temp1) * bli_zreal(a[i__5])), z__2 ); - bli_zsets( (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); + bli_tsets( z,z, (bli_zreal(temp1) * bli_zreal(a[i__5]) - bli_zimag(temp1) * bli_zimag(a[i__5])), (bli_zreal(temp1) * bli_zimag(a[i__5]) + bli_zimag(temp1) * bli_zreal(a[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__4 = ix; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); - bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); ix += *incx; iy += *incy; /* L70: */ @@ -815,11 +815,11 @@ i__4 = jy; i__2 = kplus1 + j * a_dim1; d__1 = bli_zreal(a[i__2]); - bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__3 ); - bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__3)), (bli_zimag(y[i__4]) + bli_zimag(z__3)), z__2 ); - bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__4 ); - bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); + bli_tsets( z,z, (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__3 ); + bli_tsets( z,z, (bli_zreal(y[i__4]) + bli_zreal(z__3)), (bli_zimag(y[i__4]) + bli_zimag(z__3)), z__2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__4 ); + bli_tsets( z,z, (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); jx += *incx; jy += *incy; if (j > *k) { @@ -837,16 +837,16 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__3 = j; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__3]) - bli_zimag(*alpha) * bli_zimag(x[i__3])), (bli_zreal(*alpha) * bli_zimag(x[i__3]) + bli_zimag(*alpha) * bli_zreal(x[i__3])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); - bli_zsets( (0.), (0.), temp2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(x[i__3]) - bli_zimag(*alpha) * bli_zimag(x[i__3])), (bli_zreal(*alpha) * bli_zimag(x[i__3]) + bli_zimag(*alpha) * bli_zreal(x[i__3])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); + bli_tsets( z,z, (0.), (0.), temp2 ); i__3 = j; i__4 = j; i__2 = j * a_dim1 + 1; d__1 = bli_zreal(a[i__2]); - bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__2 ); - bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); + bli_tsets( z,z, (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); l = 1 - j; /* Computing MIN */ i__4 = *n, i__2 = j + *k; @@ -855,21 +855,21 @@ i__4 = i__; i__2 = i__; i__5 = l + i__ + j * a_dim1; - bli_zsets( (bli_zreal(temp1) * bli_zreal(a[i__5]) - bli_zimag(temp1) * bli_zimag(a[i__5])), (bli_zreal(temp1) * bli_zimag(a[i__5]) + bli_zimag(temp1) * bli_zreal(a[i__5])), z__2 ); - bli_zsets( (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); + bli_tsets( z,z, (bli_zreal(temp1) * bli_zreal(a[i__5]) - bli_zimag(temp1) * bli_zimag(a[i__5])), (bli_zreal(temp1) * bli_zimag(a[i__5]) + bli_zimag(temp1) * bli_zreal(a[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__4 = i__; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); - bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); /* L90: */ } i__3 = j; i__4 = j; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__2 ); - bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); /* L100: */ } } else { @@ -878,16 +878,16 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__3 = jx; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__3]) - bli_zimag(*alpha) * bli_zimag(x[i__3])), (bli_zreal(*alpha) * bli_zimag(x[i__3]) + bli_zimag(*alpha) * bli_zreal(x[i__3])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); - bli_zsets( (0.), (0.), temp2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(x[i__3]) - bli_zimag(*alpha) * bli_zimag(x[i__3])), (bli_zreal(*alpha) * bli_zimag(x[i__3]) + bli_zimag(*alpha) * bli_zreal(x[i__3])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); + bli_tsets( z,z, (0.), (0.), temp2 ); i__3 = jy; i__4 = jy; i__2 = j * a_dim1 + 1; d__1 = bli_zreal(a[i__2]); - bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__2 ); - bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); + bli_tsets( z,z, (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); l = 1 - j; ix = jx; iy = jy; @@ -900,21 +900,21 @@ i__4 = iy; i__2 = iy; i__5 = l + i__ + j * a_dim1; - bli_zsets( (bli_zreal(temp1) * bli_zreal(a[i__5]) - bli_zimag(temp1) * bli_zimag(a[i__5])), (bli_zreal(temp1) * bli_zimag(a[i__5]) + bli_zimag(temp1) * bli_zreal(a[i__5])), z__2 ); - bli_zsets( (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); + bli_tsets( z,z, (bli_zreal(temp1) * bli_zreal(a[i__5]) - bli_zimag(temp1) * bli_zimag(a[i__5])), (bli_zreal(temp1) * bli_zimag(a[i__5]) + bli_zimag(temp1) * bli_zreal(a[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__4 = ix; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); - bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); /* L110: */ } i__3 = jy; i__4 = jy; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__2 ); - bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); jx += *incx; jy += *incy; /* L120: */ diff --git a/frame/compat/f2c/bla_hpmv.c b/frame/compat/f2c/bla_hpmv.c index 7432611571..a2b9ab1ac9 100644 --- a/frame/compat/f2c/bla_hpmv.c +++ b/frame/compat/f2c/bla_hpmv.c @@ -214,7 +214,7 @@ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; - bli_csets( (0.f), (0.f), y[i__2] ); + bli_tsets( c,c, (0.f), (0.f), y[i__2] ); /* L10: */ } } else { @@ -222,8 +222,8 @@ for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; - bli_csets( (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); /* L20: */ } } @@ -233,7 +233,7 @@ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; - bli_csets( (0.f), (0.f), y[i__2] ); + bli_tsets( c,c, (0.f), (0.f), y[i__2] ); iy += *incy; /* L30: */ } @@ -242,8 +242,8 @@ for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; i__3 = iy; - bli_csets( (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); iy += *incy; /* L40: */ } @@ -262,23 +262,23 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; - bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); - bli_csets( (0.f), (0.f), temp2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); + bli_tsets( c,c, (0.f), (0.f), temp2 ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; - bli_csets( (bli_creal(temp1) * bli_creal(ap[i__5]) - bli_cimag(temp1) * bli_cimag(ap[i__5])), (bli_creal(temp1) * bli_cimag(ap[i__5]) + bli_cimag(temp1) * bli_creal(ap[i__5])), q__2 ); - bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); + bli_tsets( c,c, (bli_creal(temp1) * bli_creal(ap[i__5]) - bli_cimag(temp1) * bli_cimag(ap[i__5])), (bli_creal(temp1) * bli_cimag(ap[i__5]) + bli_cimag(temp1) * bli_creal(ap[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); bla_r_cnjg(&q__3, &ap[k]); i__3 = i__; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); - bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); + bli_tsets( c,c, (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); ++k; /* L50: */ } @@ -286,11 +286,11 @@ i__3 = j; i__4 = kk + j - 1; r__1 = bli_creal(ap[i__4]); - bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__3 ); - bli_csets( (bli_creal(y[i__3]) + bli_creal(q__3)), (bli_cimag(y[i__3]) + bli_cimag(q__3)), q__2 ); - bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__4 ); - bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__3 ); + bli_tsets( c,c, (bli_creal(y[i__3]) + bli_creal(q__3)), (bli_cimag(y[i__3]) + bli_cimag(q__3)), q__2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__4 ); + bli_tsets( c,c, (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); kk += j; /* L60: */ } @@ -300,9 +300,9 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; - bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); - bli_csets( (0.f), (0.f), temp2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); + bli_tsets( c,c, (0.f), (0.f), temp2 ); ix = kx; iy = ky; i__2 = kk + j - 2; @@ -310,14 +310,14 @@ i__3 = iy; i__4 = iy; i__5 = k; - bli_csets( (bli_creal(temp1) * bli_creal(ap[i__5]) - bli_cimag(temp1) * bli_cimag(ap[i__5])), (bli_creal(temp1) * bli_cimag(ap[i__5]) + bli_cimag(temp1) * bli_creal(ap[i__5])), q__2 ); - bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); + bli_tsets( c,c, (bli_creal(temp1) * bli_creal(ap[i__5]) - bli_cimag(temp1) * bli_cimag(ap[i__5])), (bli_creal(temp1) * bli_cimag(ap[i__5]) + bli_cimag(temp1) * bli_creal(ap[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); bla_r_cnjg(&q__3, &ap[k]); i__3 = ix; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); - bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); + bli_tsets( c,c, (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); ix += *incx; iy += *incy; /* L70: */ @@ -326,11 +326,11 @@ i__3 = jy; i__4 = kk + j - 1; r__1 = bli_creal(ap[i__4]); - bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__3 ); - bli_csets( (bli_creal(y[i__3]) + bli_creal(q__3)), (bli_cimag(y[i__3]) + bli_cimag(q__3)), q__2 ); - bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__4 ); - bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__3 ); + bli_tsets( c,c, (bli_creal(y[i__3]) + bli_creal(q__3)), (bli_cimag(y[i__3]) + bli_cimag(q__3)), q__2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__4 ); + bli_tsets( c,c, (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); jx += *incx; jy += *incy; kk += j; @@ -345,38 +345,38 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; - bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); - bli_csets( (0.f), (0.f), temp2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); + bli_tsets( c,c, (0.f), (0.f), temp2 ); i__2 = j; i__3 = j; i__4 = kk; r__1 = bli_creal(ap[i__4]); - bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__2 ); - bli_csets( (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; - bli_csets( (bli_creal(temp1) * bli_creal(ap[i__5]) - bli_cimag(temp1) * bli_cimag(ap[i__5])), (bli_creal(temp1) * bli_cimag(ap[i__5]) + bli_cimag(temp1) * bli_creal(ap[i__5])), q__2 ); - bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); + bli_tsets( c,c, (bli_creal(temp1) * bli_creal(ap[i__5]) - bli_cimag(temp1) * bli_cimag(ap[i__5])), (bli_creal(temp1) * bli_cimag(ap[i__5]) + bli_cimag(temp1) * bli_creal(ap[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); bla_r_cnjg(&q__3, &ap[k]); i__3 = i__; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); - bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); + bli_tsets( c,c, (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); ++k; /* L90: */ } i__2 = j; i__3 = j; - bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__2 ); - bli_csets( (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); kk += *n - j + 1; /* L100: */ } @@ -386,16 +386,16 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; - bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); - bli_csets( (0.f), (0.f), temp2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); + bli_tsets( c,c, (0.f), (0.f), temp2 ); i__2 = jy; i__3 = jy; i__4 = kk; r__1 = bli_creal(ap[i__4]); - bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__2 ); - bli_csets( (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); ix = jx; iy = jy; i__2 = kk + *n - j; @@ -405,21 +405,21 @@ i__3 = iy; i__4 = iy; i__5 = k; - bli_csets( (bli_creal(temp1) * bli_creal(ap[i__5]) - bli_cimag(temp1) * bli_cimag(ap[i__5])), (bli_creal(temp1) * bli_cimag(ap[i__5]) + bli_cimag(temp1) * bli_creal(ap[i__5])), q__2 ); - bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); + bli_tsets( c,c, (bli_creal(temp1) * bli_creal(ap[i__5]) - bli_cimag(temp1) * bli_cimag(ap[i__5])), (bli_creal(temp1) * bli_cimag(ap[i__5]) + bli_cimag(temp1) * bli_creal(ap[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); bla_r_cnjg(&q__3, &ap[k]); i__3 = ix; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); - bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); + bli_tsets( c,c, (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); /* L110: */ } i__2 = jy; i__3 = jy; - bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__2 ); - bli_csets( (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__2 ); + bli_tsets( c,c, (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); jx += *incx; jy += *incy; kk += *n - j + 1; @@ -612,7 +612,7 @@ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; - bli_zsets( (0.), (0.), y[i__2] ); + bli_tsets( z,z, (0.), (0.), y[i__2] ); /* L10: */ } } else { @@ -620,8 +620,8 @@ for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; - bli_zsets( (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); /* L20: */ } } @@ -631,7 +631,7 @@ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; - bli_zsets( (0.), (0.), y[i__2] ); + bli_tsets( z,z, (0.), (0.), y[i__2] ); iy += *incy; /* L30: */ } @@ -640,8 +640,8 @@ for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; i__3 = iy; - bli_zsets( (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); iy += *incy; /* L40: */ } @@ -660,23 +660,23 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); - bli_zsets( (0.), (0.), temp2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); + bli_tsets( z,z, (0.), (0.), temp2 ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; - bli_zsets( (bli_zreal(temp1) * bli_zreal(ap[i__5]) - bli_zimag(temp1) * bli_zimag(ap[i__5])), (bli_zreal(temp1) * bli_zimag(ap[i__5]) + bli_zimag(temp1) * bli_zreal(ap[i__5])), z__2 ); - bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); + bli_tsets( z,z, (bli_zreal(temp1) * bli_zreal(ap[i__5]) - bli_zimag(temp1) * bli_zimag(ap[i__5])), (bli_zreal(temp1) * bli_zimag(ap[i__5]) + bli_zimag(temp1) * bli_zreal(ap[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); bla_d_cnjg(&z__3, &ap[k]); i__3 = i__; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); - bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); ++k; /* L50: */ } @@ -684,11 +684,11 @@ i__3 = j; i__4 = kk + j - 1; d__1 = bli_zreal(ap[i__4]); - bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__3 ); - bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__3)), (bli_zimag(y[i__3]) + bli_zimag(z__3)), z__2 ); - bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__4 ); - bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__3 ); + bli_tsets( z,z, (bli_zreal(y[i__3]) + bli_zreal(z__3)), (bli_zimag(y[i__3]) + bli_zimag(z__3)), z__2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__4 ); + bli_tsets( z,z, (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); kk += j; /* L60: */ } @@ -698,9 +698,9 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); - bli_zsets( (0.), (0.), temp2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); + bli_tsets( z,z, (0.), (0.), temp2 ); ix = kx; iy = ky; i__2 = kk + j - 2; @@ -708,14 +708,14 @@ i__3 = iy; i__4 = iy; i__5 = k; - bli_zsets( (bli_zreal(temp1) * bli_zreal(ap[i__5]) - bli_zimag(temp1) * bli_zimag(ap[i__5])), (bli_zreal(temp1) * bli_zimag(ap[i__5]) + bli_zimag(temp1) * bli_zreal(ap[i__5])), z__2 ); - bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); + bli_tsets( z,z, (bli_zreal(temp1) * bli_zreal(ap[i__5]) - bli_zimag(temp1) * bli_zimag(ap[i__5])), (bli_zreal(temp1) * bli_zimag(ap[i__5]) + bli_zimag(temp1) * bli_zreal(ap[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); bla_d_cnjg(&z__3, &ap[k]); i__3 = ix; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); - bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); ix += *incx; iy += *incy; /* L70: */ @@ -724,11 +724,11 @@ i__3 = jy; i__4 = kk + j - 1; d__1 = bli_zreal(ap[i__4]); - bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__3 ); - bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__3)), (bli_zimag(y[i__3]) + bli_zimag(z__3)), z__2 ); - bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__4 ); - bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__3 ); + bli_tsets( z,z, (bli_zreal(y[i__3]) + bli_zreal(z__3)), (bli_zimag(y[i__3]) + bli_zimag(z__3)), z__2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__4 ); + bli_tsets( z,z, (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); jx += *incx; jy += *incy; kk += j; @@ -743,38 +743,38 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); - bli_zsets( (0.), (0.), temp2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); + bli_tsets( z,z, (0.), (0.), temp2 ); i__2 = j; i__3 = j; i__4 = kk; d__1 = bli_zreal(ap[i__4]); - bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__2 ); - bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; - bli_zsets( (bli_zreal(temp1) * bli_zreal(ap[i__5]) - bli_zimag(temp1) * bli_zimag(ap[i__5])), (bli_zreal(temp1) * bli_zimag(ap[i__5]) + bli_zimag(temp1) * bli_zreal(ap[i__5])), z__2 ); - bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); + bli_tsets( z,z, (bli_zreal(temp1) * bli_zreal(ap[i__5]) - bli_zimag(temp1) * bli_zimag(ap[i__5])), (bli_zreal(temp1) * bli_zimag(ap[i__5]) + bli_zimag(temp1) * bli_zreal(ap[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); bla_d_cnjg(&z__3, &ap[k]); i__3 = i__; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); - bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); ++k; /* L90: */ } i__2 = j; i__3 = j; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__2 ); - bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); kk += *n - j + 1; /* L100: */ } @@ -784,16 +784,16 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); - bli_zsets( (0.), (0.), temp2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); + bli_tsets( z,z, (0.), (0.), temp2 ); i__2 = jy; i__3 = jy; i__4 = kk; d__1 = bli_zreal(ap[i__4]); - bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__2 ); - bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); ix = jx; iy = jy; i__2 = kk + *n - j; @@ -803,21 +803,21 @@ i__3 = iy; i__4 = iy; i__5 = k; - bli_zsets( (bli_zreal(temp1) * bli_zreal(ap[i__5]) - bli_zimag(temp1) * bli_zimag(ap[i__5])), (bli_zreal(temp1) * bli_zimag(ap[i__5]) + bli_zimag(temp1) * bli_zreal(ap[i__5])), z__2 ); - bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); + bli_tsets( z,z, (bli_zreal(temp1) * bli_zreal(ap[i__5]) - bli_zimag(temp1) * bli_zimag(ap[i__5])), (bli_zreal(temp1) * bli_zimag(ap[i__5]) + bli_zimag(temp1) * bli_zreal(ap[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); bla_d_cnjg(&z__3, &ap[k]); i__3 = ix; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); - bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); /* L110: */ } i__2 = jy; i__3 = jy; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__2 ); - bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__2 ); + bli_tsets( z,z, (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); jx += *incx; jy += *incy; kk += *n - j + 1; diff --git a/frame/compat/f2c/bla_hpr.c b/frame/compat/f2c/bla_hpr.c index 636cefef36..43f2934ed2 100644 --- a/frame/compat/f2c/bla_hpr.c +++ b/frame/compat/f2c/bla_hpr.c @@ -196,31 +196,31 @@ i__2 = j; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { bla_r_cnjg(&q__2, &x[j]); - bli_csets( (*alpha * bli_creal(q__2)), (*alpha * bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (*alpha * bli_creal(q__2)), (*alpha * bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; - bli_csets( (bli_creal(x[i__5]) * bli_creal(temp) - bli_cimag(x[i__5]) * bli_cimag(temp)), (bli_creal(x[i__5]) * bli_cimag(temp) + bli_cimag(x[i__5]) * bli_creal(temp)), q__2 ); - bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__2)), (bli_cimag(ap[i__4]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); + bli_tsets( c,c, (bli_creal(x[i__5]) * bli_creal(temp) - bli_cimag(x[i__5]) * bli_cimag(temp)), (bli_creal(x[i__5]) * bli_cimag(temp) + bli_cimag(x[i__5]) * bli_creal(temp)), q__2 ); + bli_tsets( c,c, (bli_creal(ap[i__4]) + bli_creal(q__2)), (bli_cimag(ap[i__4]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); ++k; /* L10: */ } i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = j; - bli_csets( (bli_creal(x[i__4]) * bli_creal(temp) - bli_cimag(x[i__4]) * bli_cimag(temp)), (bli_creal(x[i__4]) * bli_cimag(temp) + bli_cimag(x[i__4]) * bli_creal(temp)), q__1 ); + bli_tsets( c,c, (bli_creal(x[i__4]) * bli_creal(temp) - bli_cimag(x[i__4]) * bli_cimag(temp)), (bli_creal(x[i__4]) * bli_cimag(temp) + bli_cimag(x[i__4]) * bli_creal(temp)), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; r__1 = bli_creal(ap[i__3]); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); } kk += j; /* L20: */ @@ -232,31 +232,31 @@ i__2 = jx; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { bla_r_cnjg(&q__2, &x[jx]); - bli_csets( (*alpha * bli_creal(q__2)), (*alpha * bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (*alpha * bli_creal(q__2)), (*alpha * bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix = kx; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = k; i__4 = k; i__5 = ix; - bli_csets( (bli_creal(x[i__5]) * bli_creal(temp) - bli_cimag(x[i__5]) * bli_cimag(temp)), (bli_creal(x[i__5]) * bli_cimag(temp) + bli_cimag(x[i__5]) * bli_creal(temp)), q__2 ); - bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__2)), (bli_cimag(ap[i__4]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); + bli_tsets( c,c, (bli_creal(x[i__5]) * bli_creal(temp) - bli_cimag(x[i__5]) * bli_cimag(temp)), (bli_creal(x[i__5]) * bli_cimag(temp) + bli_cimag(x[i__5]) * bli_creal(temp)), q__2 ); + bli_tsets( c,c, (bli_creal(ap[i__4]) + bli_creal(q__2)), (bli_cimag(ap[i__4]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); ix += *incx; /* L30: */ } i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = jx; - bli_csets( (bli_creal(x[i__4]) * bli_creal(temp) - bli_cimag(x[i__4]) * bli_cimag(temp)), (bli_creal(x[i__4]) * bli_cimag(temp) + bli_cimag(x[i__4]) * bli_creal(temp)), q__1 ); + bli_tsets( c,c, (bli_creal(x[i__4]) * bli_creal(temp) - bli_cimag(x[i__4]) * bli_cimag(temp)), (bli_creal(x[i__4]) * bli_cimag(temp) + bli_cimag(x[i__4]) * bli_creal(temp)), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; r__1 = bli_creal(ap[i__3]); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); } jx += *incx; kk += j; @@ -273,23 +273,23 @@ i__2 = j; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { bla_r_cnjg(&q__2, &x[j]); - bli_csets( (*alpha * bli_creal(q__2)), (*alpha * bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (*alpha * bli_creal(q__2)), (*alpha * bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); i__2 = kk; i__3 = kk; i__4 = j; - bli_csets( (bli_creal(temp) * bli_creal(x[i__4]) - bli_cimag(temp) * bli_cimag(x[i__4])), (bli_creal(temp) * bli_cimag(x[i__4]) + bli_cimag(temp) * bli_creal(x[i__4])), q__1 ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(x[i__4]) - bli_cimag(temp) * bli_cimag(x[i__4])), (bli_creal(temp) * bli_cimag(x[i__4]) + bli_cimag(temp) * bli_creal(x[i__4])), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; - bli_csets( (bli_creal(x[i__5]) * bli_creal(temp) - bli_cimag(x[i__5]) * bli_cimag(temp)), (bli_creal(x[i__5]) * bli_cimag(temp) + bli_cimag(x[i__5]) * bli_creal(temp)), q__2 ); - bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__2)), (bli_cimag(ap[i__4]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); + bli_tsets( c,c, (bli_creal(x[i__5]) * bli_creal(temp) - bli_cimag(x[i__5]) * bli_cimag(temp)), (bli_creal(x[i__5]) * bli_cimag(temp) + bli_cimag(x[i__5]) * bli_creal(temp)), q__2 ); + bli_tsets( c,c, (bli_creal(ap[i__4]) + bli_creal(q__2)), (bli_cimag(ap[i__4]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); ++k; /* L50: */ } @@ -297,7 +297,7 @@ i__2 = kk; i__3 = kk; r__1 = bli_creal(ap[i__3]); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); } kk = kk + *n - j + 1; /* L60: */ @@ -309,14 +309,14 @@ i__2 = jx; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { bla_r_cnjg(&q__2, &x[jx]); - bli_csets( (*alpha * bli_creal(q__2)), (*alpha * bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (*alpha * bli_creal(q__2)), (*alpha * bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); i__2 = kk; i__3 = kk; i__4 = jx; - bli_csets( (bli_creal(temp) * bli_creal(x[i__4]) - bli_cimag(temp) * bli_cimag(x[i__4])), (bli_creal(temp) * bli_cimag(x[i__4]) + bli_cimag(temp) * bli_creal(x[i__4])), q__1 ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(x[i__4]) - bli_cimag(temp) * bli_cimag(x[i__4])), (bli_creal(temp) * bli_cimag(x[i__4]) + bli_cimag(temp) * bli_creal(x[i__4])), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); ix = jx; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { @@ -324,16 +324,16 @@ i__3 = k; i__4 = k; i__5 = ix; - bli_csets( (bli_creal(x[i__5]) * bli_creal(temp) - bli_cimag(x[i__5]) * bli_cimag(temp)), (bli_creal(x[i__5]) * bli_cimag(temp) + bli_cimag(x[i__5]) * bli_creal(temp)), q__2 ); - bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__2)), (bli_cimag(ap[i__4]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); + bli_tsets( c,c, (bli_creal(x[i__5]) * bli_creal(temp) - bli_cimag(x[i__5]) * bli_cimag(temp)), (bli_creal(x[i__5]) * bli_cimag(temp) + bli_cimag(x[i__5]) * bli_creal(temp)), q__2 ); + bli_tsets( c,c, (bli_creal(ap[i__4]) + bli_creal(q__2)), (bli_cimag(ap[i__4]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); /* L70: */ } } else { i__2 = kk; i__3 = kk; r__1 = bli_creal(ap[i__3]); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); } jx += *incx; kk = kk + *n - j + 1; @@ -508,31 +508,31 @@ i__2 = j; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { bla_d_cnjg(&z__2, &x[j]); - bli_zsets( (*alpha * bli_zreal(z__2)), (*alpha * bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (*alpha * bli_zreal(z__2)), (*alpha * bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; - bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp) - bli_zimag(x[i__5]) * bli_zimag(temp)), (bli_zreal(x[i__5]) * bli_zimag(temp) + bli_zimag(x[i__5]) * bli_zreal(temp)), z__2 ); - bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__2)), (bli_zimag(ap[i__4]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); + bli_tsets( z,z, (bli_zreal(x[i__5]) * bli_zreal(temp) - bli_zimag(x[i__5]) * bli_zimag(temp)), (bli_zreal(x[i__5]) * bli_zimag(temp) + bli_zimag(x[i__5]) * bli_zreal(temp)), z__2 ); + bli_tsets( z,z, (bli_zreal(ap[i__4]) + bli_zreal(z__2)), (bli_zimag(ap[i__4]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); ++k; /* L10: */ } i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = j; - bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(temp) - bli_zimag(x[i__4]) * bli_zimag(temp)), (bli_zreal(x[i__4]) * bli_zimag(temp) + bli_zimag(x[i__4]) * bli_zreal(temp)), z__1 ); + bli_tsets( z,z, (bli_zreal(x[i__4]) * bli_zreal(temp) - bli_zimag(x[i__4]) * bli_zimag(temp)), (bli_zreal(x[i__4]) * bli_zimag(temp) + bli_zimag(x[i__4]) * bli_zreal(temp)), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; d__1 = bli_zreal(ap[i__3]); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); } kk += j; /* L20: */ @@ -544,31 +544,31 @@ i__2 = jx; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { bla_d_cnjg(&z__2, &x[jx]); - bli_zsets( (*alpha * bli_zreal(z__2)), (*alpha * bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (*alpha * bli_zreal(z__2)), (*alpha * bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix = kx; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = k; i__4 = k; i__5 = ix; - bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp) - bli_zimag(x[i__5]) * bli_zimag(temp)), (bli_zreal(x[i__5]) * bli_zimag(temp) + bli_zimag(x[i__5]) * bli_zreal(temp)), z__2 ); - bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__2)), (bli_zimag(ap[i__4]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); + bli_tsets( z,z, (bli_zreal(x[i__5]) * bli_zreal(temp) - bli_zimag(x[i__5]) * bli_zimag(temp)), (bli_zreal(x[i__5]) * bli_zimag(temp) + bli_zimag(x[i__5]) * bli_zreal(temp)), z__2 ); + bli_tsets( z,z, (bli_zreal(ap[i__4]) + bli_zreal(z__2)), (bli_zimag(ap[i__4]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); ix += *incx; /* L30: */ } i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = jx; - bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(temp) - bli_zimag(x[i__4]) * bli_zimag(temp)), (bli_zreal(x[i__4]) * bli_zimag(temp) + bli_zimag(x[i__4]) * bli_zreal(temp)), z__1 ); + bli_tsets( z,z, (bli_zreal(x[i__4]) * bli_zreal(temp) - bli_zimag(x[i__4]) * bli_zimag(temp)), (bli_zreal(x[i__4]) * bli_zimag(temp) + bli_zimag(x[i__4]) * bli_zreal(temp)), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; d__1 = bli_zreal(ap[i__3]); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); } jx += *incx; kk += j; @@ -585,23 +585,23 @@ i__2 = j; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { bla_d_cnjg(&z__2, &x[j]); - bli_zsets( (*alpha * bli_zreal(z__2)), (*alpha * bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (*alpha * bli_zreal(z__2)), (*alpha * bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); i__2 = kk; i__3 = kk; i__4 = j; - bli_zsets( (bli_zreal(temp) * bli_zreal(x[i__4]) - bli_zimag(temp) * bli_zimag(x[i__4])), (bli_zreal(temp) * bli_zimag(x[i__4]) + bli_zimag(temp) * bli_zreal(x[i__4])), z__1 ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(x[i__4]) - bli_zimag(temp) * bli_zimag(x[i__4])), (bli_zreal(temp) * bli_zimag(x[i__4]) + bli_zimag(temp) * bli_zreal(x[i__4])), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; - bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp) - bli_zimag(x[i__5]) * bli_zimag(temp)), (bli_zreal(x[i__5]) * bli_zimag(temp) + bli_zimag(x[i__5]) * bli_zreal(temp)), z__2 ); - bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__2)), (bli_zimag(ap[i__4]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); + bli_tsets( z,z, (bli_zreal(x[i__5]) * bli_zreal(temp) - bli_zimag(x[i__5]) * bli_zimag(temp)), (bli_zreal(x[i__5]) * bli_zimag(temp) + bli_zimag(x[i__5]) * bli_zreal(temp)), z__2 ); + bli_tsets( z,z, (bli_zreal(ap[i__4]) + bli_zreal(z__2)), (bli_zimag(ap[i__4]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); ++k; /* L50: */ } @@ -609,7 +609,7 @@ i__2 = kk; i__3 = kk; d__1 = bli_zreal(ap[i__3]); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); } kk = kk + *n - j + 1; /* L60: */ @@ -621,14 +621,14 @@ i__2 = jx; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { bla_d_cnjg(&z__2, &x[jx]); - bli_zsets( (*alpha * bli_zreal(z__2)), (*alpha * bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (*alpha * bli_zreal(z__2)), (*alpha * bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); i__2 = kk; i__3 = kk; i__4 = jx; - bli_zsets( (bli_zreal(temp) * bli_zreal(x[i__4]) - bli_zimag(temp) * bli_zimag(x[i__4])), (bli_zreal(temp) * bli_zimag(x[i__4]) + bli_zimag(temp) * bli_zreal(x[i__4])), z__1 ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(x[i__4]) - bli_zimag(temp) * bli_zimag(x[i__4])), (bli_zreal(temp) * bli_zimag(x[i__4]) + bli_zimag(temp) * bli_zreal(x[i__4])), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); ix = jx; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { @@ -636,16 +636,16 @@ i__3 = k; i__4 = k; i__5 = ix; - bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp) - bli_zimag(x[i__5]) * bli_zimag(temp)), (bli_zreal(x[i__5]) * bli_zimag(temp) + bli_zimag(x[i__5]) * bli_zreal(temp)), z__2 ); - bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__2)), (bli_zimag(ap[i__4]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); + bli_tsets( z,z, (bli_zreal(x[i__5]) * bli_zreal(temp) - bli_zimag(x[i__5]) * bli_zimag(temp)), (bli_zreal(x[i__5]) * bli_zimag(temp) + bli_zimag(x[i__5]) * bli_zreal(temp)), z__2 ); + bli_tsets( z,z, (bli_zreal(ap[i__4]) + bli_zreal(z__2)), (bli_zimag(ap[i__4]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); /* L70: */ } } else { i__2 = kk; i__3 = kk; d__1 = bli_zreal(ap[i__3]); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); } jx += *incx; kk = kk + *n - j + 1; diff --git a/frame/compat/f2c/bla_hpr2.c b/frame/compat/f2c/bla_hpr2.c index 98ae6b5543..db366fe105 100644 --- a/frame/compat/f2c/bla_hpr2.c +++ b/frame/compat/f2c/bla_hpr2.c @@ -222,41 +222,41 @@ if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f || (bli_creal(y[i__3]) != 0.f || bli_cimag(y[i__3]) != 0.f)) { bla_r_cnjg(&q__2, &y[j]); - bli_csets( (bli_creal(*alpha) * bli_creal(q__2) - bli_cimag(*alpha) * bli_cimag(q__2)), (bli_creal(*alpha) * bli_cimag(q__2) + bli_cimag(*alpha) * bli_creal(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(q__2) - bli_cimag(*alpha) * bli_cimag(q__2)), (bli_creal(*alpha) * bli_cimag(q__2) + bli_cimag(*alpha) * bli_creal(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); i__2 = j; - bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__2 ); bla_r_cnjg(&q__1, &q__2); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; - bli_csets( (bli_creal(x[i__5]) * bli_creal(temp1) - bli_cimag(x[i__5]) * bli_cimag(temp1)), (bli_creal(x[i__5]) * bli_cimag(temp1) + bli_cimag(x[i__5]) * bli_creal(temp1)), q__3 ); - bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__3)), (bli_cimag(ap[i__4]) + bli_cimag(q__3)), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__5]) * bli_creal(temp1) - bli_cimag(x[i__5]) * bli_cimag(temp1)), (bli_creal(x[i__5]) * bli_cimag(temp1) + bli_cimag(x[i__5]) * bli_creal(temp1)), q__3 ); + bli_tsets( c,c, (bli_creal(ap[i__4]) + bli_creal(q__3)), (bli_cimag(ap[i__4]) + bli_cimag(q__3)), q__2 ); i__6 = i__; - bli_csets( (bli_creal(y[i__6]) * bli_creal(temp2) - bli_cimag(y[i__6]) * bli_cimag(temp2)), (bli_creal(y[i__6]) * bli_cimag(temp2) + bli_cimag(y[i__6]) * bli_creal(temp2)), q__4 ); - bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); + bli_tsets( c,c, (bli_creal(y[i__6]) * bli_creal(temp2) - bli_cimag(y[i__6]) * bli_cimag(temp2)), (bli_creal(y[i__6]) * bli_cimag(temp2) + bli_cimag(y[i__6]) * bli_creal(temp2)), q__4 ); + bli_tsets( c,c, (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); ++k; /* L10: */ } i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = j; - bli_csets( (bli_creal(x[i__4]) * bli_creal(temp1) - bli_cimag(x[i__4]) * bli_cimag(temp1)), (bli_creal(x[i__4]) * bli_cimag(temp1) + bli_cimag(x[i__4]) * bli_creal(temp1)), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__4]) * bli_creal(temp1) - bli_cimag(x[i__4]) * bli_cimag(temp1)), (bli_creal(x[i__4]) * bli_cimag(temp1) + bli_cimag(x[i__4]) * bli_creal(temp1)), q__2 ); i__5 = j; - bli_csets( (bli_creal(y[i__5]) * bli_creal(temp2) - bli_cimag(y[i__5]) * bli_cimag(temp2)), (bli_creal(y[i__5]) * bli_cimag(temp2) + bli_cimag(y[i__5]) * bli_creal(temp2)), q__3 ); - bli_csets( (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); + bli_tsets( c,c, (bli_creal(y[i__5]) * bli_creal(temp2) - bli_cimag(y[i__5]) * bli_cimag(temp2)), (bli_creal(y[i__5]) * bli_cimag(temp2) + bli_cimag(y[i__5]) * bli_creal(temp2)), q__3 ); + bli_tsets( c,c, (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; r__1 = bli_creal(ap[i__3]); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); } kk += j; /* L20: */ @@ -269,12 +269,12 @@ if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f || (bli_creal(y[i__3]) != 0.f || bli_cimag(y[i__3]) != 0.f)) { bla_r_cnjg(&q__2, &y[jy]); - bli_csets( (bli_creal(*alpha) * bli_creal(q__2) - bli_cimag(*alpha) * bli_cimag(q__2)), (bli_creal(*alpha) * bli_cimag(q__2) + bli_cimag(*alpha) * bli_creal(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(q__2) - bli_cimag(*alpha) * bli_cimag(q__2)), (bli_creal(*alpha) * bli_cimag(q__2) + bli_cimag(*alpha) * bli_creal(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); i__2 = jx; - bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__2 ); bla_r_cnjg(&q__1, &q__2); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); ix = kx; iy = ky; i__2 = kk + j - 2; @@ -282,12 +282,12 @@ i__3 = k; i__4 = k; i__5 = ix; - bli_csets( (bli_creal(x[i__5]) * bli_creal(temp1) - bli_cimag(x[i__5]) * bli_cimag(temp1)), (bli_creal(x[i__5]) * bli_cimag(temp1) + bli_cimag(x[i__5]) * bli_creal(temp1)), q__3 ); - bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__3)), (bli_cimag(ap[i__4]) + bli_cimag(q__3)), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__5]) * bli_creal(temp1) - bli_cimag(x[i__5]) * bli_cimag(temp1)), (bli_creal(x[i__5]) * bli_cimag(temp1) + bli_cimag(x[i__5]) * bli_creal(temp1)), q__3 ); + bli_tsets( c,c, (bli_creal(ap[i__4]) + bli_creal(q__3)), (bli_cimag(ap[i__4]) + bli_cimag(q__3)), q__2 ); i__6 = iy; - bli_csets( (bli_creal(y[i__6]) * bli_creal(temp2) - bli_cimag(y[i__6]) * bli_cimag(temp2)), (bli_creal(y[i__6]) * bli_cimag(temp2) + bli_cimag(y[i__6]) * bli_creal(temp2)), q__4 ); - bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); + bli_tsets( c,c, (bli_creal(y[i__6]) * bli_creal(temp2) - bli_cimag(y[i__6]) * bli_cimag(temp2)), (bli_creal(y[i__6]) * bli_cimag(temp2) + bli_cimag(y[i__6]) * bli_creal(temp2)), q__4 ); + bli_tsets( c,c, (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); ix += *incx; iy += *incy; /* L30: */ @@ -295,17 +295,17 @@ i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = jx; - bli_csets( (bli_creal(x[i__4]) * bli_creal(temp1) - bli_cimag(x[i__4]) * bli_cimag(temp1)), (bli_creal(x[i__4]) * bli_cimag(temp1) + bli_cimag(x[i__4]) * bli_creal(temp1)), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__4]) * bli_creal(temp1) - bli_cimag(x[i__4]) * bli_cimag(temp1)), (bli_creal(x[i__4]) * bli_cimag(temp1) + bli_cimag(x[i__4]) * bli_creal(temp1)), q__2 ); i__5 = jy; - bli_csets( (bli_creal(y[i__5]) * bli_creal(temp2) - bli_cimag(y[i__5]) * bli_cimag(temp2)), (bli_creal(y[i__5]) * bli_cimag(temp2) + bli_cimag(y[i__5]) * bli_creal(temp2)), q__3 ); - bli_csets( (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); + bli_tsets( c,c, (bli_creal(y[i__5]) * bli_creal(temp2) - bli_cimag(y[i__5]) * bli_cimag(temp2)), (bli_creal(y[i__5]) * bli_cimag(temp2) + bli_cimag(y[i__5]) * bli_creal(temp2)), q__3 ); + bli_tsets( c,c, (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; r__1 = bli_creal(ap[i__3]); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); } jx += *incx; jy += *incy; @@ -325,33 +325,33 @@ if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f || (bli_creal(y[i__3]) != 0.f || bli_cimag(y[i__3]) != 0.f)) { bla_r_cnjg(&q__2, &y[j]); - bli_csets( (bli_creal(*alpha) * bli_creal(q__2) - bli_cimag(*alpha) * bli_cimag(q__2)), (bli_creal(*alpha) * bli_cimag(q__2) + bli_cimag(*alpha) * bli_creal(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(q__2) - bli_cimag(*alpha) * bli_cimag(q__2)), (bli_creal(*alpha) * bli_cimag(q__2) + bli_cimag(*alpha) * bli_creal(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); i__2 = j; - bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__2 ); bla_r_cnjg(&q__1, &q__2); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); i__2 = kk; i__3 = kk; i__4 = j; - bli_csets( (bli_creal(x[i__4]) * bli_creal(temp1) - bli_cimag(x[i__4]) * bli_cimag(temp1)), (bli_creal(x[i__4]) * bli_cimag(temp1) + bli_cimag(x[i__4]) * bli_creal(temp1)), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__4]) * bli_creal(temp1) - bli_cimag(x[i__4]) * bli_cimag(temp1)), (bli_creal(x[i__4]) * bli_cimag(temp1) + bli_cimag(x[i__4]) * bli_creal(temp1)), q__2 ); i__5 = j; - bli_csets( (bli_creal(y[i__5]) * bli_creal(temp2) - bli_cimag(y[i__5]) * bli_cimag(temp2)), (bli_creal(y[i__5]) * bli_cimag(temp2) + bli_cimag(y[i__5]) * bli_creal(temp2)), q__3 ); - bli_csets( (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); + bli_tsets( c,c, (bli_creal(y[i__5]) * bli_creal(temp2) - bli_cimag(y[i__5]) * bli_cimag(temp2)), (bli_creal(y[i__5]) * bli_cimag(temp2) + bli_cimag(y[i__5]) * bli_creal(temp2)), q__3 ); + bli_tsets( c,c, (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; - bli_csets( (bli_creal(x[i__5]) * bli_creal(temp1) - bli_cimag(x[i__5]) * bli_cimag(temp1)), (bli_creal(x[i__5]) * bli_cimag(temp1) + bli_cimag(x[i__5]) * bli_creal(temp1)), q__3 ); - bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__3)), (bli_cimag(ap[i__4]) + bli_cimag(q__3)), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__5]) * bli_creal(temp1) - bli_cimag(x[i__5]) * bli_cimag(temp1)), (bli_creal(x[i__5]) * bli_cimag(temp1) + bli_cimag(x[i__5]) * bli_creal(temp1)), q__3 ); + bli_tsets( c,c, (bli_creal(ap[i__4]) + bli_creal(q__3)), (bli_cimag(ap[i__4]) + bli_cimag(q__3)), q__2 ); i__6 = i__; - bli_csets( (bli_creal(y[i__6]) * bli_creal(temp2) - bli_cimag(y[i__6]) * bli_cimag(temp2)), (bli_creal(y[i__6]) * bli_cimag(temp2) + bli_cimag(y[i__6]) * bli_creal(temp2)), q__4 ); - bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); + bli_tsets( c,c, (bli_creal(y[i__6]) * bli_creal(temp2) - bli_cimag(y[i__6]) * bli_cimag(temp2)), (bli_creal(y[i__6]) * bli_cimag(temp2) + bli_cimag(y[i__6]) * bli_creal(temp2)), q__4 ); + bli_tsets( c,c, (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); ++k; /* L50: */ } @@ -359,7 +359,7 @@ i__2 = kk; i__3 = kk; r__1 = bli_creal(ap[i__3]); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); } kk = kk + *n - j + 1; /* L60: */ @@ -372,21 +372,21 @@ if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f || (bli_creal(y[i__3]) != 0.f || bli_cimag(y[i__3]) != 0.f)) { bla_r_cnjg(&q__2, &y[jy]); - bli_csets( (bli_creal(*alpha) * bli_creal(q__2) - bli_cimag(*alpha) * bli_cimag(q__2)), (bli_creal(*alpha) * bli_cimag(q__2) + bli_cimag(*alpha) * bli_creal(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(q__2) - bli_cimag(*alpha) * bli_cimag(q__2)), (bli_creal(*alpha) * bli_cimag(q__2) + bli_cimag(*alpha) * bli_creal(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); i__2 = jx; - bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__2 ); bla_r_cnjg(&q__1, &q__2); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); i__2 = kk; i__3 = kk; i__4 = jx; - bli_csets( (bli_creal(x[i__4]) * bli_creal(temp1) - bli_cimag(x[i__4]) * bli_cimag(temp1)), (bli_creal(x[i__4]) * bli_cimag(temp1) + bli_cimag(x[i__4]) * bli_creal(temp1)), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__4]) * bli_creal(temp1) - bli_cimag(x[i__4]) * bli_cimag(temp1)), (bli_creal(x[i__4]) * bli_cimag(temp1) + bli_cimag(x[i__4]) * bli_creal(temp1)), q__2 ); i__5 = jy; - bli_csets( (bli_creal(y[i__5]) * bli_creal(temp2) - bli_cimag(y[i__5]) * bli_cimag(temp2)), (bli_creal(y[i__5]) * bli_cimag(temp2) + bli_cimag(y[i__5]) * bli_creal(temp2)), q__3 ); - bli_csets( (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); + bli_tsets( c,c, (bli_creal(y[i__5]) * bli_creal(temp2) - bli_cimag(y[i__5]) * bli_cimag(temp2)), (bli_creal(y[i__5]) * bli_cimag(temp2) + bli_cimag(y[i__5]) * bli_creal(temp2)), q__3 ); + bli_tsets( c,c, (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); ix = jx; iy = jy; i__2 = kk + *n - j; @@ -396,19 +396,19 @@ i__3 = k; i__4 = k; i__5 = ix; - bli_csets( (bli_creal(x[i__5]) * bli_creal(temp1) - bli_cimag(x[i__5]) * bli_cimag(temp1)), (bli_creal(x[i__5]) * bli_cimag(temp1) + bli_cimag(x[i__5]) * bli_creal(temp1)), q__3 ); - bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__3)), (bli_cimag(ap[i__4]) + bli_cimag(q__3)), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__5]) * bli_creal(temp1) - bli_cimag(x[i__5]) * bli_cimag(temp1)), (bli_creal(x[i__5]) * bli_cimag(temp1) + bli_cimag(x[i__5]) * bli_creal(temp1)), q__3 ); + bli_tsets( c,c, (bli_creal(ap[i__4]) + bli_creal(q__3)), (bli_cimag(ap[i__4]) + bli_cimag(q__3)), q__2 ); i__6 = iy; - bli_csets( (bli_creal(y[i__6]) * bli_creal(temp2) - bli_cimag(y[i__6]) * bli_cimag(temp2)), (bli_creal(y[i__6]) * bli_cimag(temp2) + bli_cimag(y[i__6]) * bli_creal(temp2)), q__4 ); - bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); + bli_tsets( c,c, (bli_creal(y[i__6]) * bli_creal(temp2) - bli_cimag(y[i__6]) * bli_cimag(temp2)), (bli_creal(y[i__6]) * bli_cimag(temp2) + bli_cimag(y[i__6]) * bli_creal(temp2)), q__4 ); + bli_tsets( c,c, (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); /* L70: */ } } else { i__2 = kk; i__3 = kk; r__1 = bli_creal(ap[i__3]); - bli_csets( (r__1), (0.f), ap[i__2] ); + bli_tsets( c,c, (r__1), (0.f), ap[i__2] ); } jx += *incx; jy += *incy; @@ -610,41 +610,41 @@ if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0. || (bli_zreal(y[i__3]) != 0. || bli_zimag(y[i__3]) != 0.)) { bla_d_cnjg(&z__2, &y[j]); - bli_zsets( (bli_zreal(*alpha) * bli_zreal(z__2) - bli_zimag(*alpha) * bli_zimag(z__2)), (bli_zreal(*alpha) * bli_zimag(z__2) + bli_zimag(*alpha) * bli_zreal(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(z__2) - bli_zimag(*alpha) * bli_zimag(z__2)), (bli_zreal(*alpha) * bli_zimag(z__2) + bli_zimag(*alpha) * bli_zreal(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); i__2 = j; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__2 ); bla_d_cnjg(&z__1, &z__2); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; - bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp1) - bli_zimag(x[i__5]) * bli_zimag(temp1)), (bli_zreal(x[i__5]) * bli_zimag(temp1) + bli_zimag(x[i__5]) * bli_zreal(temp1)), z__3 ); - bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__3)), (bli_zimag(ap[i__4]) + bli_zimag(z__3)), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__5]) * bli_zreal(temp1) - bli_zimag(x[i__5]) * bli_zimag(temp1)), (bli_zreal(x[i__5]) * bli_zimag(temp1) + bli_zimag(x[i__5]) * bli_zreal(temp1)), z__3 ); + bli_tsets( z,z, (bli_zreal(ap[i__4]) + bli_zreal(z__3)), (bli_zimag(ap[i__4]) + bli_zimag(z__3)), z__2 ); i__6 = i__; - bli_zsets( (bli_zreal(y[i__6]) * bli_zreal(temp2) - bli_zimag(y[i__6]) * bli_zimag(temp2)), (bli_zreal(y[i__6]) * bli_zimag(temp2) + bli_zimag(y[i__6]) * bli_zreal(temp2)), z__4 ); - bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); + bli_tsets( z,z, (bli_zreal(y[i__6]) * bli_zreal(temp2) - bli_zimag(y[i__6]) * bli_zimag(temp2)), (bli_zreal(y[i__6]) * bli_zimag(temp2) + bli_zimag(y[i__6]) * bli_zreal(temp2)), z__4 ); + bli_tsets( z,z, (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); ++k; /* L10: */ } i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = j; - bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(temp1) - bli_zimag(x[i__4]) * bli_zimag(temp1)), (bli_zreal(x[i__4]) * bli_zimag(temp1) + bli_zimag(x[i__4]) * bli_zreal(temp1)), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__4]) * bli_zreal(temp1) - bli_zimag(x[i__4]) * bli_zimag(temp1)), (bli_zreal(x[i__4]) * bli_zimag(temp1) + bli_zimag(x[i__4]) * bli_zreal(temp1)), z__2 ); i__5 = j; - bli_zsets( (bli_zreal(y[i__5]) * bli_zreal(temp2) - bli_zimag(y[i__5]) * bli_zimag(temp2)), (bli_zreal(y[i__5]) * bli_zimag(temp2) + bli_zimag(y[i__5]) * bli_zreal(temp2)), z__3 ); - bli_zsets( (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); + bli_tsets( z,z, (bli_zreal(y[i__5]) * bli_zreal(temp2) - bli_zimag(y[i__5]) * bli_zimag(temp2)), (bli_zreal(y[i__5]) * bli_zimag(temp2) + bli_zimag(y[i__5]) * bli_zreal(temp2)), z__3 ); + bli_tsets( z,z, (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; d__1 = bli_zreal(ap[i__3]); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); } kk += j; /* L20: */ @@ -657,12 +657,12 @@ if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0. || (bli_zreal(y[i__3]) != 0. || bli_zimag(y[i__3]) != 0.)) { bla_d_cnjg(&z__2, &y[jy]); - bli_zsets( (bli_zreal(*alpha) * bli_zreal(z__2) - bli_zimag(*alpha) * bli_zimag(z__2)), (bli_zreal(*alpha) * bli_zimag(z__2) + bli_zimag(*alpha) * bli_zreal(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(z__2) - bli_zimag(*alpha) * bli_zimag(z__2)), (bli_zreal(*alpha) * bli_zimag(z__2) + bli_zimag(*alpha) * bli_zreal(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); i__2 = jx; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__2 ); bla_d_cnjg(&z__1, &z__2); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); ix = kx; iy = ky; i__2 = kk + j - 2; @@ -670,12 +670,12 @@ i__3 = k; i__4 = k; i__5 = ix; - bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp1) - bli_zimag(x[i__5]) * bli_zimag(temp1)), (bli_zreal(x[i__5]) * bli_zimag(temp1) + bli_zimag(x[i__5]) * bli_zreal(temp1)), z__3 ); - bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__3)), (bli_zimag(ap[i__4]) + bli_zimag(z__3)), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__5]) * bli_zreal(temp1) - bli_zimag(x[i__5]) * bli_zimag(temp1)), (bli_zreal(x[i__5]) * bli_zimag(temp1) + bli_zimag(x[i__5]) * bli_zreal(temp1)), z__3 ); + bli_tsets( z,z, (bli_zreal(ap[i__4]) + bli_zreal(z__3)), (bli_zimag(ap[i__4]) + bli_zimag(z__3)), z__2 ); i__6 = iy; - bli_zsets( (bli_zreal(y[i__6]) * bli_zreal(temp2) - bli_zimag(y[i__6]) * bli_zimag(temp2)), (bli_zreal(y[i__6]) * bli_zimag(temp2) + bli_zimag(y[i__6]) * bli_zreal(temp2)), z__4 ); - bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); + bli_tsets( z,z, (bli_zreal(y[i__6]) * bli_zreal(temp2) - bli_zimag(y[i__6]) * bli_zimag(temp2)), (bli_zreal(y[i__6]) * bli_zimag(temp2) + bli_zimag(y[i__6]) * bli_zreal(temp2)), z__4 ); + bli_tsets( z,z, (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); ix += *incx; iy += *incy; /* L30: */ @@ -683,17 +683,17 @@ i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = jx; - bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(temp1) - bli_zimag(x[i__4]) * bli_zimag(temp1)), (bli_zreal(x[i__4]) * bli_zimag(temp1) + bli_zimag(x[i__4]) * bli_zreal(temp1)), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__4]) * bli_zreal(temp1) - bli_zimag(x[i__4]) * bli_zimag(temp1)), (bli_zreal(x[i__4]) * bli_zimag(temp1) + bli_zimag(x[i__4]) * bli_zreal(temp1)), z__2 ); i__5 = jy; - bli_zsets( (bli_zreal(y[i__5]) * bli_zreal(temp2) - bli_zimag(y[i__5]) * bli_zimag(temp2)), (bli_zreal(y[i__5]) * bli_zimag(temp2) + bli_zimag(y[i__5]) * bli_zreal(temp2)), z__3 ); - bli_zsets( (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); + bli_tsets( z,z, (bli_zreal(y[i__5]) * bli_zreal(temp2) - bli_zimag(y[i__5]) * bli_zimag(temp2)), (bli_zreal(y[i__5]) * bli_zimag(temp2) + bli_zimag(y[i__5]) * bli_zreal(temp2)), z__3 ); + bli_tsets( z,z, (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; d__1 = bli_zreal(ap[i__3]); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); } jx += *incx; jy += *incy; @@ -713,33 +713,33 @@ if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0. || (bli_zreal(y[i__3]) != 0. || bli_zimag(y[i__3]) != 0.)) { bla_d_cnjg(&z__2, &y[j]); - bli_zsets( (bli_zreal(*alpha) * bli_zreal(z__2) - bli_zimag(*alpha) * bli_zimag(z__2)), (bli_zreal(*alpha) * bli_zimag(z__2) + bli_zimag(*alpha) * bli_zreal(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(z__2) - bli_zimag(*alpha) * bli_zimag(z__2)), (bli_zreal(*alpha) * bli_zimag(z__2) + bli_zimag(*alpha) * bli_zreal(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); i__2 = j; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__2 ); bla_d_cnjg(&z__1, &z__2); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); i__2 = kk; i__3 = kk; i__4 = j; - bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(temp1) - bli_zimag(x[i__4]) * bli_zimag(temp1)), (bli_zreal(x[i__4]) * bli_zimag(temp1) + bli_zimag(x[i__4]) * bli_zreal(temp1)), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__4]) * bli_zreal(temp1) - bli_zimag(x[i__4]) * bli_zimag(temp1)), (bli_zreal(x[i__4]) * bli_zimag(temp1) + bli_zimag(x[i__4]) * bli_zreal(temp1)), z__2 ); i__5 = j; - bli_zsets( (bli_zreal(y[i__5]) * bli_zreal(temp2) - bli_zimag(y[i__5]) * bli_zimag(temp2)), (bli_zreal(y[i__5]) * bli_zimag(temp2) + bli_zimag(y[i__5]) * bli_zreal(temp2)), z__3 ); - bli_zsets( (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); + bli_tsets( z,z, (bli_zreal(y[i__5]) * bli_zreal(temp2) - bli_zimag(y[i__5]) * bli_zimag(temp2)), (bli_zreal(y[i__5]) * bli_zimag(temp2) + bli_zimag(y[i__5]) * bli_zreal(temp2)), z__3 ); + bli_tsets( z,z, (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; - bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp1) - bli_zimag(x[i__5]) * bli_zimag(temp1)), (bli_zreal(x[i__5]) * bli_zimag(temp1) + bli_zimag(x[i__5]) * bli_zreal(temp1)), z__3 ); - bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__3)), (bli_zimag(ap[i__4]) + bli_zimag(z__3)), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__5]) * bli_zreal(temp1) - bli_zimag(x[i__5]) * bli_zimag(temp1)), (bli_zreal(x[i__5]) * bli_zimag(temp1) + bli_zimag(x[i__5]) * bli_zreal(temp1)), z__3 ); + bli_tsets( z,z, (bli_zreal(ap[i__4]) + bli_zreal(z__3)), (bli_zimag(ap[i__4]) + bli_zimag(z__3)), z__2 ); i__6 = i__; - bli_zsets( (bli_zreal(y[i__6]) * bli_zreal(temp2) - bli_zimag(y[i__6]) * bli_zimag(temp2)), (bli_zreal(y[i__6]) * bli_zimag(temp2) + bli_zimag(y[i__6]) * bli_zreal(temp2)), z__4 ); - bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); + bli_tsets( z,z, (bli_zreal(y[i__6]) * bli_zreal(temp2) - bli_zimag(y[i__6]) * bli_zimag(temp2)), (bli_zreal(y[i__6]) * bli_zimag(temp2) + bli_zimag(y[i__6]) * bli_zreal(temp2)), z__4 ); + bli_tsets( z,z, (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); ++k; /* L50: */ } @@ -747,7 +747,7 @@ i__2 = kk; i__3 = kk; d__1 = bli_zreal(ap[i__3]); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); } kk = kk + *n - j + 1; /* L60: */ @@ -760,21 +760,21 @@ if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0. || (bli_zreal(y[i__3]) != 0. || bli_zimag(y[i__3]) != 0.)) { bla_d_cnjg(&z__2, &y[jy]); - bli_zsets( (bli_zreal(*alpha) * bli_zreal(z__2) - bli_zimag(*alpha) * bli_zimag(z__2)), (bli_zreal(*alpha) * bli_zimag(z__2) + bli_zimag(*alpha) * bli_zreal(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(z__2) - bli_zimag(*alpha) * bli_zimag(z__2)), (bli_zreal(*alpha) * bli_zimag(z__2) + bli_zimag(*alpha) * bli_zreal(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); i__2 = jx; - bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__2 ); bla_d_cnjg(&z__1, &z__2); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); i__2 = kk; i__3 = kk; i__4 = jx; - bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(temp1) - bli_zimag(x[i__4]) * bli_zimag(temp1)), (bli_zreal(x[i__4]) * bli_zimag(temp1) + bli_zimag(x[i__4]) * bli_zreal(temp1)), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__4]) * bli_zreal(temp1) - bli_zimag(x[i__4]) * bli_zimag(temp1)), (bli_zreal(x[i__4]) * bli_zimag(temp1) + bli_zimag(x[i__4]) * bli_zreal(temp1)), z__2 ); i__5 = jy; - bli_zsets( (bli_zreal(y[i__5]) * bli_zreal(temp2) - bli_zimag(y[i__5]) * bli_zimag(temp2)), (bli_zreal(y[i__5]) * bli_zimag(temp2) + bli_zimag(y[i__5]) * bli_zreal(temp2)), z__3 ); - bli_zsets( (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); + bli_tsets( z,z, (bli_zreal(y[i__5]) * bli_zreal(temp2) - bli_zimag(y[i__5]) * bli_zimag(temp2)), (bli_zreal(y[i__5]) * bli_zimag(temp2) + bli_zimag(y[i__5]) * bli_zreal(temp2)), z__3 ); + bli_tsets( z,z, (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); ix = jx; iy = jy; i__2 = kk + *n - j; @@ -784,19 +784,19 @@ i__3 = k; i__4 = k; i__5 = ix; - bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp1) - bli_zimag(x[i__5]) * bli_zimag(temp1)), (bli_zreal(x[i__5]) * bli_zimag(temp1) + bli_zimag(x[i__5]) * bli_zreal(temp1)), z__3 ); - bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__3)), (bli_zimag(ap[i__4]) + bli_zimag(z__3)), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__5]) * bli_zreal(temp1) - bli_zimag(x[i__5]) * bli_zimag(temp1)), (bli_zreal(x[i__5]) * bli_zimag(temp1) + bli_zimag(x[i__5]) * bli_zreal(temp1)), z__3 ); + bli_tsets( z,z, (bli_zreal(ap[i__4]) + bli_zreal(z__3)), (bli_zimag(ap[i__4]) + bli_zimag(z__3)), z__2 ); i__6 = iy; - bli_zsets( (bli_zreal(y[i__6]) * bli_zreal(temp2) - bli_zimag(y[i__6]) * bli_zimag(temp2)), (bli_zreal(y[i__6]) * bli_zimag(temp2) + bli_zimag(y[i__6]) * bli_zreal(temp2)), z__4 ); - bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); + bli_tsets( z,z, (bli_zreal(y[i__6]) * bli_zreal(temp2) - bli_zimag(y[i__6]) * bli_zimag(temp2)), (bli_zreal(y[i__6]) * bli_zimag(temp2) + bli_zimag(y[i__6]) * bli_zreal(temp2)), z__4 ); + bli_tsets( z,z, (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); /* L70: */ } } else { i__2 = kk; i__3 = kk; d__1 = bli_zreal(ap[i__3]); - bli_zsets( (d__1), (0.), ap[i__2] ); + bli_tsets( z,z, (d__1), (0.), ap[i__2] ); } jx += *incx; jy += *incy; diff --git a/frame/compat/f2c/bla_rot.c b/frame/compat/f2c/bla_rot.c index 0dbd720d21..e120c5e99a 100644 --- a/frame/compat/f2c/bla_rot.c +++ b/frame/compat/f2c/bla_rot.c @@ -221,20 +221,20 @@ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = ix; - bli_csets( (*c__ * bli_creal(cx[i__2])), (*c__ * bli_cimag(cx[i__2])), q__2 ); + bli_tsets( c,c, (*c__ * bli_creal(cx[i__2])), (*c__ * bli_cimag(cx[i__2])), q__2 ); i__3 = iy; - bli_csets( (*s * bli_creal(cy[i__3])), (*s * bli_cimag(cy[i__3])), q__3 ); - bli_csets( (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ctemp ); + bli_tsets( c,c, (*s * bli_creal(cy[i__3])), (*s * bli_cimag(cy[i__3])), q__3 ); + bli_tsets( c,c, (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), ctemp ); i__2 = iy; i__3 = iy; - bli_csets( (*c__ * bli_creal(cy[i__3])), (*c__ * bli_cimag(cy[i__3])), q__2 ); + bli_tsets( c,c, (*c__ * bli_creal(cy[i__3])), (*c__ * bli_cimag(cy[i__3])), q__2 ); i__4 = ix; - bli_csets( (*s * bli_creal(cx[i__4])), (*s * bli_cimag(cx[i__4])), q__3 ); - bli_csets( (bli_creal(q__2) - bli_creal(q__3)), (bli_cimag(q__2) - bli_cimag(q__3)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), cy[i__2] ); + bli_tsets( c,c, (*s * bli_creal(cx[i__4])), (*s * bli_cimag(cx[i__4])), q__3 ); + bli_tsets( c,c, (bli_creal(q__2) - bli_creal(q__3)), (bli_cimag(q__2) - bli_cimag(q__3)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), cy[i__2] ); i__2 = ix; - bli_csets( (bli_creal(ctemp)), (bli_cimag(ctemp)), cx[i__2] ); + bli_tsets( c,c, (bli_creal(ctemp)), (bli_cimag(ctemp)), cx[i__2] ); ix += *incx; iy += *incy; /* L10: */ @@ -247,20 +247,20 @@ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; - bli_csets( (*c__ * bli_creal(cx[i__2])), (*c__ * bli_cimag(cx[i__2])), q__2 ); + bli_tsets( c,c, (*c__ * bli_creal(cx[i__2])), (*c__ * bli_cimag(cx[i__2])), q__2 ); i__3 = i__; - bli_csets( (*s * bli_creal(cy[i__3])), (*s * bli_cimag(cy[i__3])), q__3 ); - bli_csets( (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ctemp ); + bli_tsets( c,c, (*s * bli_creal(cy[i__3])), (*s * bli_cimag(cy[i__3])), q__3 ); + bli_tsets( c,c, (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), ctemp ); i__2 = i__; i__3 = i__; - bli_csets( (*c__ * bli_creal(cy[i__3])), (*c__ * bli_cimag(cy[i__3])), q__2 ); + bli_tsets( c,c, (*c__ * bli_creal(cy[i__3])), (*c__ * bli_cimag(cy[i__3])), q__2 ); i__4 = i__; - bli_csets( (*s * bli_creal(cx[i__4])), (*s * bli_cimag(cx[i__4])), q__3 ); - bli_csets( (bli_creal(q__2) - bli_creal(q__3)), (bli_cimag(q__2) - bli_cimag(q__3)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), cy[i__2] ); + bli_tsets( c,c, (*s * bli_creal(cx[i__4])), (*s * bli_cimag(cx[i__4])), q__3 ); + bli_tsets( c,c, (bli_creal(q__2) - bli_creal(q__3)), (bli_cimag(q__2) - bli_cimag(q__3)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), cy[i__2] ); i__2 = i__; - bli_csets( (bli_creal(ctemp)), (bli_cimag(ctemp)), cx[i__2] ); + bli_tsets( c,c, (bli_creal(ctemp)), (bli_cimag(ctemp)), cx[i__2] ); /* L30: */ } return 0; @@ -314,20 +314,20 @@ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = ix; - bli_zsets( (*c__ * bli_zreal(zx[i__2])), (*c__ * bli_zimag(zx[i__2])), z__2 ); + bli_tsets( z,z, (*c__ * bli_zreal(zx[i__2])), (*c__ * bli_zimag(zx[i__2])), z__2 ); i__3 = iy; - bli_zsets( (*s * bli_zreal(zy[i__3])), (*s * bli_zimag(zy[i__3])), z__3 ); - bli_zsets( (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ztemp ); + bli_tsets( z,z, (*s * bli_zreal(zy[i__3])), (*s * bli_zimag(zy[i__3])), z__3 ); + bli_tsets( z,z, (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), ztemp ); i__2 = iy; i__3 = iy; - bli_zsets( (*c__ * bli_zreal(zy[i__3])), (*c__ * bli_zimag(zy[i__3])), z__2 ); + bli_tsets( z,z, (*c__ * bli_zreal(zy[i__3])), (*c__ * bli_zimag(zy[i__3])), z__2 ); i__4 = ix; - bli_zsets( (*s * bli_zreal(zx[i__4])), (*s * bli_zimag(zx[i__4])), z__3 ); - bli_zsets( (bli_zreal(z__2) - bli_zreal(z__3)), (bli_zimag(z__2) - bli_zimag(z__3)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), zy[i__2] ); + bli_tsets( z,z, (*s * bli_zreal(zx[i__4])), (*s * bli_zimag(zx[i__4])), z__3 ); + bli_tsets( z,z, (bli_zreal(z__2) - bli_zreal(z__3)), (bli_zimag(z__2) - bli_zimag(z__3)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), zy[i__2] ); i__2 = ix; - bli_zsets( (bli_zreal(ztemp)), (bli_zimag(ztemp)), zx[i__2] ); + bli_tsets( z,z, (bli_zreal(ztemp)), (bli_zimag(ztemp)), zx[i__2] ); ix += *incx; iy += *incy; /* L10: */ @@ -340,20 +340,20 @@ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; - bli_zsets( (*c__ * bli_zreal(zx[i__2])), (*c__ * bli_zimag(zx[i__2])), z__2 ); + bli_tsets( z,z, (*c__ * bli_zreal(zx[i__2])), (*c__ * bli_zimag(zx[i__2])), z__2 ); i__3 = i__; - bli_zsets( (*s * bli_zreal(zy[i__3])), (*s * bli_zimag(zy[i__3])), z__3 ); - bli_zsets( (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ztemp ); + bli_tsets( z,z, (*s * bli_zreal(zy[i__3])), (*s * bli_zimag(zy[i__3])), z__3 ); + bli_tsets( z,z, (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), ztemp ); i__2 = i__; i__3 = i__; - bli_zsets( (*c__ * bli_zreal(zy[i__3])), (*c__ * bli_zimag(zy[i__3])), z__2 ); + bli_tsets( z,z, (*c__ * bli_zreal(zy[i__3])), (*c__ * bli_zimag(zy[i__3])), z__2 ); i__4 = i__; - bli_zsets( (*s * bli_zreal(zx[i__4])), (*s * bli_zimag(zx[i__4])), z__3 ); - bli_zsets( (bli_zreal(z__2) - bli_zreal(z__3)), (bli_zimag(z__2) - bli_zimag(z__3)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), zy[i__2] ); + bli_tsets( z,z, (*s * bli_zreal(zx[i__4])), (*s * bli_zimag(zx[i__4])), z__3 ); + bli_tsets( z,z, (bli_zreal(z__2) - bli_zreal(z__3)), (bli_zimag(z__2) - bli_zimag(z__3)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), zy[i__2] ); i__2 = i__; - bli_zsets( (bli_zreal(ztemp)), (bli_zimag(ztemp)), zx[i__2] ); + bli_tsets( z,z, (bli_zreal(ztemp)), (bli_zimag(ztemp)), zx[i__2] ); /* L30: */ } return 0; @@ -433,62 +433,71 @@ cx[i__2].r = stemp.r; cx[i__2].i = stemp.i; #else - bli_csets + bli_tsets ( + c,c, *c__ * bli_creal(cx[i__2]), *c__ * bli_cimag(cx[i__2]), q__2 ); i__3 = iy; - bli_csets + bli_tsets ( + c,c, bli_creal(*s) * bli_creal(cy[i__3]) - bli_cimag(*s) * bli_cimag(cy[i__3]), bli_creal(*s) * bli_cimag(cy[i__3]) + bli_cimag(*s) * bli_creal(cy[i__3]), q__3 ); - bli_csets + bli_tsets ( + c,c, bli_creal(q__2) + bli_creal(q__3), bli_cimag(q__2) + bli_cimag(q__3), q__1 ); - bli_csets + bli_tsets ( + c,c, bli_creal(q__1), bli_cimag(q__1), stemp ); i__2 = iy; i__3 = iy; - bli_csets + bli_tsets ( + c,c, *c__ * bli_creal(cy[i__3]), *c__ * bli_cimag(cy[i__3]), q__2 ); bla_r_cnjg(&q__4, s); i__4 = ix; - bli_csets + bli_tsets ( + c,c, bli_creal(q__4) * bli_creal(cx[i__4]) - bli_cimag(q__4) * bli_cimag(cx[i__4]), bli_creal(q__4) * bli_cimag(cx[i__4]) + bli_cimag(q__4) * bli_creal(cx[i__4]), q__3 ); - bli_csets + bli_tsets ( + c,c, bli_creal(q__2) - bli_creal(q__3), bli_cimag(q__2) - bli_cimag(q__3), q__1 ); - bli_csets + bli_tsets ( + c,c, bli_creal(q__1), bli_cimag(q__1), cy[i__2] ); i__2 = ix; - bli_csets + bli_tsets ( + c,c, bli_creal(stemp), bli_cimag(stemp), cx[i__2] @@ -532,62 +541,71 @@ cx[i__2].r = stemp.r; cx[i__2].i = stemp.i; #else - bli_csets + bli_tsets ( + c,c, *c__ * bli_creal(cx[i__2]), *c__ * bli_cimag(cx[i__2]), q__2 ); i__3 = i__; - bli_csets + bli_tsets ( + c,c, bli_creal(*s) * bli_creal(cy[i__3]) - bli_cimag(*s) * bli_cimag(cy[i__3]), bli_creal(*s) * bli_cimag(cy[i__3]) + bli_cimag(*s) * bli_creal(cy[i__3]), q__3 ); - bli_csets + bli_tsets ( + c,c, bli_creal(q__2) + bli_creal(q__3), bli_cimag(q__2) + bli_cimag(q__3), q__1 ); - bli_csets + bli_tsets ( + c,c, bli_creal(q__1), bli_cimag(q__1), stemp ); i__2 = i__; i__3 = i__; - bli_csets + bli_tsets ( + c,c, *c__ * bli_creal(cy[i__3]), *c__ * bli_cimag(cy[i__3]), q__2 ); bla_r_cnjg(&q__4, s); i__4 = i__; - bli_csets + bli_tsets ( + c,c, bli_creal(q__4) * bli_creal(cx[i__4]) - bli_cimag(q__4) * bli_cimag(cx[i__4]), bli_creal(q__4) * bli_cimag(cx[i__4]) + bli_cimag(q__4) * bli_creal(cx[i__4]), q__3 ); - bli_csets + bli_tsets ( + c,c, bli_creal(q__2) - bli_creal(q__3), bli_cimag(q__2) - bli_cimag(q__3), q__1 ); - bli_csets + bli_tsets ( + c,c, bli_creal(q__1), bli_cimag(q__1), cy[i__2] ); i__2 = i__; - bli_csets + bli_tsets ( + c,c, bli_creal(stemp), bli_cimag(stemp), cx[i__2] @@ -672,62 +690,71 @@ cx[i__2].r = stemp.r; cx[i__2].i = stemp.i; #else - bli_zsets + bli_tsets ( + z,z, *c__ * bli_zreal(cx[i__2]), *c__ * bli_zimag(cx[i__2]), z__2 ); i__3 = iy; - bli_zsets + bli_tsets ( + z,z, bli_zreal(*s) * bli_zreal(cy[i__3]) - bli_zimag(*s) * bli_zimag(cy[i__3]), bli_zreal(*s) * bli_zimag(cy[i__3]) + bli_zimag(*s) * bli_zreal(cy[i__3]), z__3 ); - bli_zsets + bli_tsets ( + z,z, bli_zreal(z__2) + bli_zreal(z__3), bli_zimag(z__2) + bli_zimag(z__3), z__1 ); - bli_zsets + bli_tsets ( + z,z, bli_zreal(z__1), bli_zimag(z__1), stemp ); i__2 = iy; i__3 = iy; - bli_zsets + bli_tsets ( + z,z, *c__ * bli_zreal(cy[i__3]), *c__ * bli_zimag(cy[i__3]), z__2 ); bla_d_cnjg(&z__4, s); i__4 = ix; - bli_zsets + bli_tsets ( + z,z, bli_zreal(z__4) * bli_zreal(cx[i__4]) - bli_zimag(z__4) * bli_zimag(cx[i__4]), bli_zreal(z__4) * bli_zimag(cx[i__4]) + bli_zimag(z__4) * bli_zreal(cx[i__4]), z__3 ); - bli_zsets + bli_tsets ( + z,z, bli_zreal(z__2) - bli_zreal(z__3), bli_zimag(z__2) - bli_zimag(z__3), z__1 ); - bli_zsets + bli_tsets ( + z,z, bli_zreal(z__1), bli_zimag(z__1), cy[i__2] ); i__2 = ix; - bli_zsets + bli_tsets ( + z,z, bli_zreal(stemp), bli_zimag(stemp), cx[i__2] @@ -771,62 +798,71 @@ cx[i__2].r = stemp.r; cx[i__2].i = stemp.i; #else - bli_zsets + bli_tsets ( + z,z, *c__ * bli_zreal(cx[i__2]), *c__ * bli_zimag(cx[i__2]), z__2 ); i__3 = i__; - bli_zsets + bli_tsets ( + z,z, bli_zreal(*s) * bli_zreal(cy[i__3]) - bli_zimag(*s) * bli_zimag(cy[i__3]), bli_zreal(*s) * bli_zimag(cy[i__3]) + bli_zimag(*s) * bli_zreal(cy[i__3]), z__3 ); - bli_zsets + bli_tsets ( + z,z, bli_zreal(z__2) + bli_zreal(z__3), bli_zimag(z__2) + bli_zimag(z__3), z__1 ); - bli_zsets + bli_tsets ( + z,z, bli_zreal(z__1), bli_zimag(z__1), stemp ); i__2 = i__; i__3 = i__; - bli_zsets + bli_tsets ( + z,z, *c__ * bli_zreal(cy[i__3]), *c__ * bli_zimag(cy[i__3]), z__2 ); bla_d_cnjg(&z__4, s); i__4 = i__; - bli_zsets + bli_tsets ( + z,z, bli_zreal(z__4) * bli_zreal(cx[i__4]) - bli_zimag(z__4) * bli_zimag(cx[i__4]), bli_zreal(z__4) * bli_zimag(cx[i__4]) + bli_zimag(z__4) * bli_zreal(cx[i__4]), z__3 ); - bli_zsets + bli_tsets ( + z,z, bli_zreal(z__2) - bli_zreal(z__3), bli_zimag(z__2) - bli_zimag(z__3), z__1 ); - bli_zsets + bli_tsets ( + z,z, bli_zreal(z__1), bli_zimag(z__1), cy[i__2] ); i__2 = i__; - bli_zsets + bli_tsets ( + z,z, bli_zreal(stemp), bli_zimag(stemp), cx[i__2] diff --git a/frame/compat/f2c/bla_rotg.c b/frame/compat/f2c/bla_rotg.c index 1572689f57..48a40d4732 100644 --- a/frame/compat/f2c/bla_rotg.c +++ b/frame/compat/f2c/bla_rotg.c @@ -180,28 +180,28 @@ static bla_double dc_b4 = 1.; goto L10; } *c__ = 0.f; - bli_csets( 1.f, 0.f, *s ); - bli_csets( bli_creal(*cb), bli_cimag(*cb), *ca ); + bli_tsets( c,c, 1.f, 0.f, *s ); + bli_tsets( c,c, bli_creal(*cb), bli_cimag(*cb), *ca ); goto L20; L10: scale = bla_c_abs(ca) + bla_c_abs(cb); - bli_csets( (bli_creal(*ca) / scale), (bli_cimag(*ca) / scale), q__1 ); + bli_tsets( c,c, (bli_creal(*ca) / scale), (bli_cimag(*ca) / scale), q__1 ); /* Computing 2nd power */ r__1 = bla_c_abs(&q__1); - bli_csets( (bli_creal(*cb) / scale), (bli_cimag(*cb) / scale), q__2 ); + bli_tsets( c,c, (bli_creal(*cb) / scale), (bli_cimag(*cb) / scale), q__2 ); /* Computing 2nd power */ r__2 = bla_c_abs(&q__2); norm = scale * sqrt(r__1 * r__1 + r__2 * r__2); r__1 = bla_c_abs(ca); - bli_csets( (bli_creal(*ca) / r__1), (bli_cimag(*ca) / r__1), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), alpha ); + bli_tsets( c,c, (bli_creal(*ca) / r__1), (bli_cimag(*ca) / r__1), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), alpha ); *c__ = bla_c_abs(ca) / norm; bla_r_cnjg(&q__3, cb); - bli_csets( (bli_creal(alpha) * bli_creal(q__3) - bli_cimag(alpha) * bli_cimag(q__3)), (bli_creal(alpha) * bli_cimag(q__3) + bli_cimag(alpha) * bli_creal(q__3)), q__2 ); - bli_csets( (bli_creal(q__2) / norm), (bli_cimag(q__2) / norm), q__1 ); - bli_csets( bli_creal(q__1), bli_cimag(q__1), *s ); - bli_csets( (norm * bli_creal(alpha)), (norm * bli_cimag(alpha)), q__1 ); - bli_csets( bli_creal(q__1), bli_cimag(q__1), *ca ); + bli_tsets( c,c, (bli_creal(alpha) * bli_creal(q__3) - bli_cimag(alpha) * bli_cimag(q__3)), (bli_creal(alpha) * bli_cimag(q__3) + bli_cimag(alpha) * bli_creal(q__3)), q__2 ); + bli_tsets( c,c, (bli_creal(q__2) / norm), (bli_cimag(q__2) / norm), q__1 ); + bli_tsets( c,c, bli_creal(q__1), bli_cimag(q__1), *s ); + bli_tsets( c,c, (norm * bli_creal(alpha)), (norm * bli_cimag(alpha)), q__1 ); + bli_tsets( c,c, bli_creal(q__1), bli_cimag(q__1), *ca ); L20: return 0; } /* crotg_ */ @@ -232,30 +232,30 @@ static bla_double dc_b4 = 1.; goto L10; } *c__ = 0.; - bli_zsets( 1., 0., *s ); - bli_zsets( bli_zreal(*cb), bli_zimag(*cb), *ca ); + bli_tsets( z,z, 1., 0., *s ); + bli_tsets( z,z, bli_zreal(*cb), bli_zimag(*cb), *ca ); goto L20; L10: scale = bla_z_abs(ca) + bla_z_abs(cb); - bli_zsets( (scale), (0.), z__2 ); + bli_tsets( z,z, (scale), (0.), z__2 ); bla_z_div(&z__1, ca, &z__2); /* Computing 2nd power */ d__1 = bla_z_abs(&z__1); - bli_zsets( (scale), (0.), z__4 ); + bli_tsets( z,z, (scale), (0.), z__4 ); bla_z_div(&z__3, cb, &z__4); /* Computing 2nd power */ d__2 = bla_z_abs(&z__3); norm = scale * sqrt(d__1 * d__1 + d__2 * d__2); d__1 = bla_z_abs(ca); - bli_zsets( (bli_zreal(*ca) / d__1), (bli_zimag(*ca) / d__1), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), alpha ); + bli_tsets( z,z, (bli_zreal(*ca) / d__1), (bli_zimag(*ca) / d__1), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), alpha ); *c__ = bla_z_abs(ca) / norm; bla_d_cnjg(&z__3, cb); - bli_zsets( (bli_zreal(alpha) * bli_zreal(z__3) - bli_zimag(alpha) * bli_zimag(z__3)), (bli_zreal(alpha) * bli_zimag(z__3) + bli_zimag(alpha) * bli_zreal(z__3)), z__2 ); - bli_zsets( (bli_zreal(z__2) / norm), (bli_zimag(z__2) / norm), z__1 ); - bli_zsets( bli_zreal(z__1), bli_zimag(z__1), *s ); - bli_zsets( (norm * bli_zreal(alpha)), (norm * bli_zimag(alpha)), z__1 ); - bli_zsets( bli_zreal(z__1), bli_zimag(z__1), *ca ); + bli_tsets( z,z, (bli_zreal(alpha) * bli_zreal(z__3) - bli_zimag(alpha) * bli_zimag(z__3)), (bli_zreal(alpha) * bli_zimag(z__3) + bli_zimag(alpha) * bli_zreal(z__3)), z__2 ); + bli_tsets( z,z, (bli_zreal(z__2) / norm), (bli_zimag(z__2) / norm), z__1 ); + bli_tsets( z,z, bli_zreal(z__1), bli_zimag(z__1), *s ); + bli_tsets( z,z, (norm * bli_zreal(alpha)), (norm * bli_zimag(alpha)), z__1 ); + bli_tsets( z,z, bli_zreal(z__1), bli_zimag(z__1), *ca ); L20: return 0; } /* zrotg_ */ diff --git a/frame/compat/f2c/bla_tbmv.c b/frame/compat/f2c/bla_tbmv.c index 16c149c89d..d1d191dd0e 100644 --- a/frame/compat/f2c/bla_tbmv.c +++ b/frame/compat/f2c/bla_tbmv.c @@ -266,7 +266,7 @@ i__2 = j; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { i__2 = j; - bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); + bli_tsets( c,c, (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; @@ -275,17 +275,17 @@ i__2 = i__; i__3 = i__; i__5 = l + i__ + j * a_dim1; - bli_csets( (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); - bli_csets( (bli_creal(x[i__3]) + bli_creal(q__2)), (bli_cimag(x[i__3]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__3]) + bli_creal(q__2)), (bli_cimag(x[i__3]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); /* L10: */ } if (nounit) { i__4 = j; i__2 = j; i__3 = kplus1 + j * a_dim1; - bli_csets( (bli_creal(x[i__2]) * bli_creal(a[i__3]) - bli_cimag(x[i__2]) * bli_cimag(a[i__3])), (bli_creal(x[i__2]) * bli_cimag(a[i__3]) + bli_cimag(x[i__2]) * bli_creal(a[i__3])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__4] ); + bli_tsets( c,c, (bli_creal(x[i__2]) * bli_creal(a[i__3]) - bli_cimag(x[i__2]) * bli_cimag(a[i__3])), (bli_creal(x[i__2]) * bli_cimag(a[i__3]) + bli_cimag(x[i__2]) * bli_creal(a[i__3])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__4] ); } } /* L20: */ @@ -297,7 +297,7 @@ i__4 = jx; if (bli_creal(x[i__4]) != 0.f || bli_cimag(x[i__4]) != 0.f) { i__4 = jx; - bli_csets( (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); + bli_tsets( c,c, (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); ix = kx; l = kplus1 - j; /* Computing MAX */ @@ -307,9 +307,9 @@ i__4 = ix; i__2 = ix; i__5 = l + i__ + j * a_dim1; - bli_csets( (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); - bli_csets( (bli_creal(x[i__2]) + bli_creal(q__2)), (bli_cimag(x[i__2]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__4] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__2]) + bli_creal(q__2)), (bli_cimag(x[i__2]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__4] ); ix += *incx; /* L30: */ } @@ -317,8 +317,8 @@ i__3 = jx; i__4 = jx; i__2 = kplus1 + j * a_dim1; - bli_csets( (bli_creal(x[i__4]) * bli_creal(a[i__2]) - bli_cimag(x[i__4]) * bli_cimag(a[i__2])), (bli_creal(x[i__4]) * bli_cimag(a[i__2]) + bli_cimag(x[i__4]) * bli_creal(a[i__2])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); + bli_tsets( c,c, (bli_creal(x[i__4]) * bli_creal(a[i__2]) - bli_cimag(x[i__4]) * bli_cimag(a[i__2])), (bli_creal(x[i__4]) * bli_cimag(a[i__2]) + bli_cimag(x[i__4]) * bli_creal(a[i__2])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); } } jx += *incx; @@ -334,7 +334,7 @@ i__1 = j; if (bli_creal(x[i__1]) != 0.f || bli_cimag(x[i__1]) != 0.f) { i__1 = j; - bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); + bli_tsets( c,c, (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); l = 1 - j; /* Computing MIN */ i__1 = *n, i__3 = j + *k; @@ -343,17 +343,17 @@ i__1 = i__; i__3 = i__; i__2 = l + i__ + j * a_dim1; - bli_csets( (bli_creal(temp) * bli_creal(a[i__2]) - bli_cimag(temp) * bli_cimag(a[i__2])), (bli_creal(temp) * bli_cimag(a[i__2]) + bli_cimag(temp) * bli_creal(a[i__2])), q__2 ); - bli_csets( (bli_creal(x[i__3]) + bli_creal(q__2)), (bli_cimag(x[i__3]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(a[i__2]) - bli_cimag(temp) * bli_cimag(a[i__2])), (bli_creal(temp) * bli_cimag(a[i__2]) + bli_cimag(temp) * bli_creal(a[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__3]) + bli_creal(q__2)), (bli_cimag(x[i__3]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); /* L50: */ } if (nounit) { i__4 = j; i__1 = j; i__3 = j * a_dim1 + 1; - bli_csets( (bli_creal(x[i__1]) * bli_creal(a[i__3]) - bli_cimag(x[i__1]) * bli_cimag(a[i__3])), (bli_creal(x[i__1]) * bli_cimag(a[i__3]) + bli_cimag(x[i__1]) * bli_creal(a[i__3])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__4] ); + bli_tsets( c,c, (bli_creal(x[i__1]) * bli_creal(a[i__3]) - bli_cimag(x[i__1]) * bli_cimag(a[i__3])), (bli_creal(x[i__1]) * bli_cimag(a[i__3]) + bli_cimag(x[i__1]) * bli_creal(a[i__3])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__4] ); } } /* L60: */ @@ -365,7 +365,7 @@ i__4 = jx; if (bli_creal(x[i__4]) != 0.f || bli_cimag(x[i__4]) != 0.f) { i__4 = jx; - bli_csets( (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); + bli_tsets( c,c, (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); ix = kx; l = 1 - j; /* Computing MIN */ @@ -375,9 +375,9 @@ i__4 = ix; i__1 = ix; i__2 = l + i__ + j * a_dim1; - bli_csets( (bli_creal(temp) * bli_creal(a[i__2]) - bli_cimag(temp) * bli_cimag(a[i__2])), (bli_creal(temp) * bli_cimag(a[i__2]) + bli_cimag(temp) * bli_creal(a[i__2])), q__2 ); - bli_csets( (bli_creal(x[i__1]) + bli_creal(q__2)), (bli_cimag(x[i__1]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__4] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(a[i__2]) - bli_cimag(temp) * bli_cimag(a[i__2])), (bli_creal(temp) * bli_cimag(a[i__2]) + bli_cimag(temp) * bli_creal(a[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__1]) + bli_creal(q__2)), (bli_cimag(x[i__1]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__4] ); ix -= *incx; /* L70: */ } @@ -385,8 +385,8 @@ i__3 = jx; i__4 = jx; i__1 = j * a_dim1 + 1; - bli_csets( (bli_creal(x[i__4]) * bli_creal(a[i__1]) - bli_cimag(x[i__4]) * bli_cimag(a[i__1])), (bli_creal(x[i__4]) * bli_cimag(a[i__1]) + bli_cimag(x[i__4]) * bli_creal(a[i__1])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); + bli_tsets( c,c, (bli_creal(x[i__4]) * bli_creal(a[i__1]) - bli_cimag(x[i__4]) * bli_cimag(a[i__1])), (bli_creal(x[i__4]) * bli_cimag(a[i__1]) + bli_cimag(x[i__4]) * bli_creal(a[i__1])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); } } jx -= *incx; @@ -406,13 +406,13 @@ if (*incx == 1) { for (j = *n; j >= 1; --j) { i__3 = j; - bli_csets( (bli_creal(x[i__3])), (bli_cimag(x[i__3])), temp ); + bli_tsets( c,c, (bli_creal(x[i__3])), (bli_cimag(x[i__3])), temp ); l = kplus1 - j; if (noconj) { if (nounit) { i__3 = kplus1 + j * a_dim1; - bli_csets( (bli_creal(temp) * bli_creal(a[i__3]) - bli_cimag(temp) * bli_cimag(a[i__3])), (bli_creal(temp) * bli_cimag(a[i__3]) + bli_cimag(temp) * bli_creal(a[i__3])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(a[i__3]) - bli_cimag(temp) * bli_cimag(a[i__3])), (bli_creal(temp) * bli_cimag(a[i__3]) + bli_cimag(temp) * bli_creal(a[i__3])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; @@ -420,16 +420,16 @@ for (i__ = j - 1; i__ >= i__3; --i__) { i__4 = l + i__ + j * a_dim1; i__1 = i__; - bli_csets( (bli_creal(a[i__4]) * bli_creal(x[i__1]) - bli_cimag(a[i__4]) * bli_cimag(x[i__1])), (bli_creal(a[i__4]) * bli_cimag(x[i__1]) + bli_cimag(a[i__4]) * bli_creal(x[i__1])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(a[i__4]) * bli_creal(x[i__1]) - bli_cimag(a[i__4]) * bli_cimag(x[i__1])), (bli_creal(a[i__4]) * bli_cimag(x[i__1]) + bli_cimag(a[i__4]) * bli_creal(x[i__1])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L90: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); - bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; @@ -437,14 +437,14 @@ for (i__ = j - 1; i__ >= i__3; --i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__4 = i__; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L100: */ } } i__3 = j; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__3] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__3] ); /* L110: */ } } else { @@ -452,15 +452,15 @@ jx = kx; for (j = *n; j >= 1; --j) { i__3 = jx; - bli_csets( (bli_creal(x[i__3])), (bli_cimag(x[i__3])), temp ); + bli_tsets( c,c, (bli_creal(x[i__3])), (bli_cimag(x[i__3])), temp ); kx -= *incx; ix = kx; l = kplus1 - j; if (noconj) { if (nounit) { i__3 = kplus1 + j * a_dim1; - bli_csets( (bli_creal(temp) * bli_creal(a[i__3]) - bli_cimag(temp) * bli_cimag(a[i__3])), (bli_creal(temp) * bli_cimag(a[i__3]) + bli_cimag(temp) * bli_creal(a[i__3])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(a[i__3]) - bli_cimag(temp) * bli_cimag(a[i__3])), (bli_creal(temp) * bli_cimag(a[i__3]) + bli_cimag(temp) * bli_creal(a[i__3])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; @@ -468,17 +468,17 @@ for (i__ = j - 1; i__ >= i__3; --i__) { i__4 = l + i__ + j * a_dim1; i__1 = ix; - bli_csets( (bli_creal(a[i__4]) * bli_creal(x[i__1]) - bli_cimag(a[i__4]) * bli_cimag(x[i__1])), (bli_creal(a[i__4]) * bli_cimag(x[i__1]) + bli_cimag(a[i__4]) * bli_creal(x[i__1])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(a[i__4]) * bli_creal(x[i__1]) - bli_cimag(a[i__4]) * bli_cimag(x[i__1])), (bli_creal(a[i__4]) * bli_cimag(x[i__1]) + bli_cimag(a[i__4]) * bli_creal(x[i__1])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix -= *incx; /* L120: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); - bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; @@ -486,15 +486,15 @@ for (i__ = j - 1; i__ >= i__3; --i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__4 = ix; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix -= *incx; /* L130: */ } } i__3 = jx; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__3] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__3] ); jx -= *incx; /* L140: */ } @@ -504,13 +504,13 @@ i__3 = *n; for (j = 1; j <= i__3; ++j) { i__4 = j; - bli_csets( (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); + bli_tsets( c,c, (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); l = 1 - j; if (noconj) { if (nounit) { i__4 = j * a_dim1 + 1; - bli_csets( (bli_creal(temp) * bli_creal(a[i__4]) - bli_cimag(temp) * bli_cimag(a[i__4])), (bli_creal(temp) * bli_cimag(a[i__4]) + bli_cimag(temp) * bli_creal(a[i__4])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(a[i__4]) - bli_cimag(temp) * bli_cimag(a[i__4])), (bli_creal(temp) * bli_cimag(a[i__4]) + bli_cimag(temp) * bli_creal(a[i__4])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; @@ -518,16 +518,16 @@ for (i__ = j + 1; i__ <= i__4; ++i__) { i__1 = l + i__ + j * a_dim1; i__2 = i__; - bli_csets( (bli_creal(a[i__1]) * bli_creal(x[i__2]) - bli_cimag(a[i__1]) * bli_cimag(x[i__2])), (bli_creal(a[i__1]) * bli_cimag(x[i__2]) + bli_cimag(a[i__1]) * bli_creal(x[i__2])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(a[i__1]) * bli_creal(x[i__2]) - bli_cimag(a[i__1]) * bli_cimag(x[i__2])), (bli_creal(a[i__1]) * bli_cimag(x[i__2]) + bli_cimag(a[i__1]) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L150: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &a[j * a_dim1 + 1]); - bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; @@ -535,14 +535,14 @@ for (i__ = j + 1; i__ <= i__4; ++i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__1 = i__; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__1]) - bli_cimag(q__3) * bli_cimag(x[i__1])), (bli_creal(q__3) * bli_cimag(x[i__1]) + bli_cimag(q__3) * bli_creal(x[i__1])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__1]) - bli_cimag(q__3) * bli_cimag(x[i__1])), (bli_creal(q__3) * bli_cimag(x[i__1]) + bli_cimag(q__3) * bli_creal(x[i__1])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L160: */ } } i__4 = j; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__4] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__4] ); /* L170: */ } } else { @@ -550,15 +550,15 @@ i__3 = *n; for (j = 1; j <= i__3; ++j) { i__4 = jx; - bli_csets( (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); + bli_tsets( c,c, (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); kx += *incx; ix = kx; l = 1 - j; if (noconj) { if (nounit) { i__4 = j * a_dim1 + 1; - bli_csets( (bli_creal(temp) * bli_creal(a[i__4]) - bli_cimag(temp) * bli_cimag(a[i__4])), (bli_creal(temp) * bli_cimag(a[i__4]) + bli_cimag(temp) * bli_creal(a[i__4])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(a[i__4]) - bli_cimag(temp) * bli_cimag(a[i__4])), (bli_creal(temp) * bli_cimag(a[i__4]) + bli_cimag(temp) * bli_creal(a[i__4])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; @@ -566,17 +566,17 @@ for (i__ = j + 1; i__ <= i__4; ++i__) { i__1 = l + i__ + j * a_dim1; i__2 = ix; - bli_csets( (bli_creal(a[i__1]) * bli_creal(x[i__2]) - bli_cimag(a[i__1]) * bli_cimag(x[i__2])), (bli_creal(a[i__1]) * bli_cimag(x[i__2]) + bli_cimag(a[i__1]) * bli_creal(x[i__2])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(a[i__1]) * bli_creal(x[i__2]) - bli_cimag(a[i__1]) * bli_cimag(x[i__2])), (bli_creal(a[i__1]) * bli_cimag(x[i__2]) + bli_cimag(a[i__1]) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L180: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &a[j * a_dim1 + 1]); - bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; @@ -584,15 +584,15 @@ for (i__ = j + 1; i__ <= i__4; ++i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__1 = ix; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__1]) - bli_cimag(q__3) * bli_cimag(x[i__1])), (bli_creal(q__3) * bli_cimag(x[i__1]) + bli_cimag(q__3) * bli_creal(x[i__1])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__1]) - bli_cimag(q__3) * bli_cimag(x[i__1])), (bli_creal(q__3) * bli_cimag(x[i__1]) + bli_cimag(q__3) * bli_creal(x[i__1])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L190: */ } } i__4 = jx; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__4] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__4] ); jx += *incx; /* L200: */ } @@ -1658,7 +1658,7 @@ i__2 = j; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { i__2 = j; - bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; @@ -1667,17 +1667,17 @@ i__2 = i__; i__3 = i__; i__5 = l + i__ + j * a_dim1; - bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); - bli_zsets( (bli_zreal(x[i__3]) + bli_zreal(z__2)), (bli_zimag(x[i__3]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__3]) + bli_zreal(z__2)), (bli_zimag(x[i__3]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); /* L10: */ } if (nounit) { i__4 = j; i__2 = j; i__3 = kplus1 + j * a_dim1; - bli_zsets( (bli_zreal(x[i__2]) * bli_zreal(a[i__3]) - bli_zimag(x[i__2]) * bli_zimag(a[i__3])), (bli_zreal(x[i__2]) * bli_zimag(a[i__3]) + bli_zimag(x[i__2]) * bli_zreal(a[i__3])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__4] ); + bli_tsets( z,z, (bli_zreal(x[i__2]) * bli_zreal(a[i__3]) - bli_zimag(x[i__2]) * bli_zimag(a[i__3])), (bli_zreal(x[i__2]) * bli_zimag(a[i__3]) + bli_zimag(x[i__2]) * bli_zreal(a[i__3])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__4] ); } } /* L20: */ @@ -1689,7 +1689,7 @@ i__4 = jx; if (bli_zreal(x[i__4]) != 0. || bli_zimag(x[i__4]) != 0.) { i__4 = jx; - bli_zsets( (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); ix = kx; l = kplus1 - j; /* Computing MAX */ @@ -1699,9 +1699,9 @@ i__4 = ix; i__2 = ix; i__5 = l + i__ + j * a_dim1; - bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); - bli_zsets( (bli_zreal(x[i__2]) + bli_zreal(z__2)), (bli_zimag(x[i__2]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__4] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__2]) + bli_zreal(z__2)), (bli_zimag(x[i__2]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__4] ); ix += *incx; /* L30: */ } @@ -1709,8 +1709,8 @@ i__3 = jx; i__4 = jx; i__2 = kplus1 + j * a_dim1; - bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(a[i__2]) - bli_zimag(x[i__4]) * bli_zimag(a[i__2])), (bli_zreal(x[i__4]) * bli_zimag(a[i__2]) + bli_zimag(x[i__4]) * bli_zreal(a[i__2])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); + bli_tsets( z,z, (bli_zreal(x[i__4]) * bli_zreal(a[i__2]) - bli_zimag(x[i__4]) * bli_zimag(a[i__2])), (bli_zreal(x[i__4]) * bli_zimag(a[i__2]) + bli_zimag(x[i__4]) * bli_zreal(a[i__2])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); } } jx += *incx; @@ -1726,7 +1726,7 @@ i__1 = j; if (bli_zreal(x[i__1]) != 0. || bli_zimag(x[i__1]) != 0.) { i__1 = j; - bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); l = 1 - j; /* Computing MIN */ i__1 = *n, i__3 = j + *k; @@ -1735,17 +1735,17 @@ i__1 = i__; i__3 = i__; i__2 = l + i__ + j * a_dim1; - bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__2]) - bli_zimag(temp) * bli_zimag(a[i__2])), (bli_zreal(temp) * bli_zimag(a[i__2]) + bli_zimag(temp) * bli_zreal(a[i__2])), z__2 ); - bli_zsets( (bli_zreal(x[i__3]) + bli_zreal(z__2)), (bli_zimag(x[i__3]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(a[i__2]) - bli_zimag(temp) * bli_zimag(a[i__2])), (bli_zreal(temp) * bli_zimag(a[i__2]) + bli_zimag(temp) * bli_zreal(a[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__3]) + bli_zreal(z__2)), (bli_zimag(x[i__3]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); /* L50: */ } if (nounit) { i__4 = j; i__1 = j; i__3 = j * a_dim1 + 1; - bli_zsets( (bli_zreal(x[i__1]) * bli_zreal(a[i__3]) - bli_zimag(x[i__1]) * bli_zimag(a[i__3])), (bli_zreal(x[i__1]) * bli_zimag(a[i__3]) + bli_zimag(x[i__1]) * bli_zreal(a[i__3])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__4] ); + bli_tsets( z,z, (bli_zreal(x[i__1]) * bli_zreal(a[i__3]) - bli_zimag(x[i__1]) * bli_zimag(a[i__3])), (bli_zreal(x[i__1]) * bli_zimag(a[i__3]) + bli_zimag(x[i__1]) * bli_zreal(a[i__3])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__4] ); } } /* L60: */ @@ -1757,7 +1757,7 @@ i__4 = jx; if (bli_zreal(x[i__4]) != 0. || bli_zimag(x[i__4]) != 0.) { i__4 = jx; - bli_zsets( (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); ix = kx; l = 1 - j; /* Computing MIN */ @@ -1767,9 +1767,9 @@ i__4 = ix; i__1 = ix; i__2 = l + i__ + j * a_dim1; - bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__2]) - bli_zimag(temp) * bli_zimag(a[i__2])), (bli_zreal(temp) * bli_zimag(a[i__2]) + bli_zimag(temp) * bli_zreal(a[i__2])), z__2 ); - bli_zsets( (bli_zreal(x[i__1]) + bli_zreal(z__2)), (bli_zimag(x[i__1]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__4] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(a[i__2]) - bli_zimag(temp) * bli_zimag(a[i__2])), (bli_zreal(temp) * bli_zimag(a[i__2]) + bli_zimag(temp) * bli_zreal(a[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__1]) + bli_zreal(z__2)), (bli_zimag(x[i__1]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__4] ); ix -= *incx; /* L70: */ } @@ -1777,8 +1777,8 @@ i__3 = jx; i__4 = jx; i__1 = j * a_dim1 + 1; - bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(a[i__1]) - bli_zimag(x[i__4]) * bli_zimag(a[i__1])), (bli_zreal(x[i__4]) * bli_zimag(a[i__1]) + bli_zimag(x[i__4]) * bli_zreal(a[i__1])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); + bli_tsets( z,z, (bli_zreal(x[i__4]) * bli_zreal(a[i__1]) - bli_zimag(x[i__4]) * bli_zimag(a[i__1])), (bli_zreal(x[i__4]) * bli_zimag(a[i__1]) + bli_zimag(x[i__4]) * bli_zreal(a[i__1])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); } } jx -= *incx; @@ -1798,13 +1798,13 @@ if (*incx == 1) { for (j = *n; j >= 1; --j) { i__3 = j; - bli_zsets( (bli_zreal(x[i__3])), (bli_zimag(x[i__3])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__3])), (bli_zimag(x[i__3])), temp ); l = kplus1 - j; if (noconj) { if (nounit) { i__3 = kplus1 + j * a_dim1; - bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__3]) - bli_zimag(temp) * bli_zimag(a[i__3])), (bli_zreal(temp) * bli_zimag(a[i__3]) + bli_zimag(temp) * bli_zreal(a[i__3])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(a[i__3]) - bli_zimag(temp) * bli_zimag(a[i__3])), (bli_zreal(temp) * bli_zimag(a[i__3]) + bli_zimag(temp) * bli_zreal(a[i__3])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; @@ -1812,16 +1812,16 @@ for (i__ = j - 1; i__ >= i__3; --i__) { i__4 = l + i__ + j * a_dim1; i__1 = i__; - bli_zsets( (bli_zreal(a[i__4]) * bli_zreal(x[i__1]) - bli_zimag(a[i__4]) * bli_zimag(x[i__1])), (bli_zreal(a[i__4]) * bli_zimag(x[i__1]) + bli_zimag(a[i__4]) * bli_zreal(x[i__1])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(a[i__4]) * bli_zreal(x[i__1]) - bli_zimag(a[i__4]) * bli_zimag(x[i__1])), (bli_zreal(a[i__4]) * bli_zimag(x[i__1]) + bli_zimag(a[i__4]) * bli_zreal(x[i__1])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L90: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); - bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; @@ -1829,14 +1829,14 @@ for (i__ = j - 1; i__ >= i__3; --i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__4 = i__; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L100: */ } } i__3 = j; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__3] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__3] ); /* L110: */ } } else { @@ -1844,15 +1844,15 @@ jx = kx; for (j = *n; j >= 1; --j) { i__3 = jx; - bli_zsets( (bli_zreal(x[i__3])), (bli_zimag(x[i__3])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__3])), (bli_zimag(x[i__3])), temp ); kx -= *incx; ix = kx; l = kplus1 - j; if (noconj) { if (nounit) { i__3 = kplus1 + j * a_dim1; - bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__3]) - bli_zimag(temp) * bli_zimag(a[i__3])), (bli_zreal(temp) * bli_zimag(a[i__3]) + bli_zimag(temp) * bli_zreal(a[i__3])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(a[i__3]) - bli_zimag(temp) * bli_zimag(a[i__3])), (bli_zreal(temp) * bli_zimag(a[i__3]) + bli_zimag(temp) * bli_zreal(a[i__3])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; @@ -1860,17 +1860,17 @@ for (i__ = j - 1; i__ >= i__3; --i__) { i__4 = l + i__ + j * a_dim1; i__1 = ix; - bli_zsets( (bli_zreal(a[i__4]) * bli_zreal(x[i__1]) - bli_zimag(a[i__4]) * bli_zimag(x[i__1])), (bli_zreal(a[i__4]) * bli_zimag(x[i__1]) + bli_zimag(a[i__4]) * bli_zreal(x[i__1])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(a[i__4]) * bli_zreal(x[i__1]) - bli_zimag(a[i__4]) * bli_zimag(x[i__1])), (bli_zreal(a[i__4]) * bli_zimag(x[i__1]) + bli_zimag(a[i__4]) * bli_zreal(x[i__1])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix -= *incx; /* L120: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); - bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; @@ -1878,15 +1878,15 @@ for (i__ = j - 1; i__ >= i__3; --i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__4 = ix; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix -= *incx; /* L130: */ } } i__3 = jx; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__3] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__3] ); jx -= *incx; /* L140: */ } @@ -1896,13 +1896,13 @@ i__3 = *n; for (j = 1; j <= i__3; ++j) { i__4 = j; - bli_zsets( (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); l = 1 - j; if (noconj) { if (nounit) { i__4 = j * a_dim1 + 1; - bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__4]) - bli_zimag(temp) * bli_zimag(a[i__4])), (bli_zreal(temp) * bli_zimag(a[i__4]) + bli_zimag(temp) * bli_zreal(a[i__4])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(a[i__4]) - bli_zimag(temp) * bli_zimag(a[i__4])), (bli_zreal(temp) * bli_zimag(a[i__4]) + bli_zimag(temp) * bli_zreal(a[i__4])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; @@ -1910,16 +1910,16 @@ for (i__ = j + 1; i__ <= i__4; ++i__) { i__1 = l + i__ + j * a_dim1; i__2 = i__; - bli_zsets( (bli_zreal(a[i__1]) * bli_zreal(x[i__2]) - bli_zimag(a[i__1]) * bli_zimag(x[i__2])), (bli_zreal(a[i__1]) * bli_zimag(x[i__2]) + bli_zimag(a[i__1]) * bli_zreal(x[i__2])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(a[i__1]) * bli_zreal(x[i__2]) - bli_zimag(a[i__1]) * bli_zimag(x[i__2])), (bli_zreal(a[i__1]) * bli_zimag(x[i__2]) + bli_zimag(a[i__1]) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L150: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &a[j * a_dim1 + 1]); - bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; @@ -1927,14 +1927,14 @@ for (i__ = j + 1; i__ <= i__4; ++i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__1 = i__; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__1]) - bli_zimag(z__3) * bli_zimag(x[i__1])), (bli_zreal(z__3) * bli_zimag(x[i__1]) + bli_zimag(z__3) * bli_zreal(x[i__1])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__1]) - bli_zimag(z__3) * bli_zimag(x[i__1])), (bli_zreal(z__3) * bli_zimag(x[i__1]) + bli_zimag(z__3) * bli_zreal(x[i__1])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L160: */ } } i__4 = j; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__4] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__4] ); /* L170: */ } } else { @@ -1942,15 +1942,15 @@ i__3 = *n; for (j = 1; j <= i__3; ++j) { i__4 = jx; - bli_zsets( (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); kx += *incx; ix = kx; l = 1 - j; if (noconj) { if (nounit) { i__4 = j * a_dim1 + 1; - bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__4]) - bli_zimag(temp) * bli_zimag(a[i__4])), (bli_zreal(temp) * bli_zimag(a[i__4]) + bli_zimag(temp) * bli_zreal(a[i__4])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(a[i__4]) - bli_zimag(temp) * bli_zimag(a[i__4])), (bli_zreal(temp) * bli_zimag(a[i__4]) + bli_zimag(temp) * bli_zreal(a[i__4])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; @@ -1958,17 +1958,17 @@ for (i__ = j + 1; i__ <= i__4; ++i__) { i__1 = l + i__ + j * a_dim1; i__2 = ix; - bli_zsets( (bli_zreal(a[i__1]) * bli_zreal(x[i__2]) - bli_zimag(a[i__1]) * bli_zimag(x[i__2])), (bli_zreal(a[i__1]) * bli_zimag(x[i__2]) + bli_zimag(a[i__1]) * bli_zreal(x[i__2])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(a[i__1]) * bli_zreal(x[i__2]) - bli_zimag(a[i__1]) * bli_zimag(x[i__2])), (bli_zreal(a[i__1]) * bli_zimag(x[i__2]) + bli_zimag(a[i__1]) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L180: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &a[j * a_dim1 + 1]); - bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; @@ -1976,15 +1976,15 @@ for (i__ = j + 1; i__ <= i__4; ++i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__1 = ix; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__1]) - bli_zimag(z__3) * bli_zimag(x[i__1])), (bli_zreal(z__3) * bli_zimag(x[i__1]) + bli_zimag(z__3) * bli_zreal(x[i__1])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__1]) - bli_zimag(z__3) * bli_zimag(x[i__1])), (bli_zreal(z__3) * bli_zimag(x[i__1]) + bli_zimag(z__3) * bli_zreal(x[i__1])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L190: */ } } i__4 = jx; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__4] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__4] ); jx += *incx; /* L200: */ } diff --git a/frame/compat/f2c/bla_tbsv.c b/frame/compat/f2c/bla_tbsv.c index b237556f80..9f58c0adb8 100644 --- a/frame/compat/f2c/bla_tbsv.c +++ b/frame/compat/f2c/bla_tbsv.c @@ -272,10 +272,10 @@ if (nounit) { i__1 = j; bla_c_div(&q__1, &x[j], &a[kplus1 + j * a_dim1]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); } i__1 = j; - bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); + bli_tsets( c,c, (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__1 = f2c_max(i__2,i__3); @@ -283,9 +283,9 @@ i__2 = i__; i__3 = i__; i__4 = l + i__ + j * a_dim1; - bli_csets( (bli_creal(temp) * bli_creal(a[i__4]) - bli_cimag(temp) * bli_cimag(a[i__4])), (bli_creal(temp) * bli_cimag(a[i__4]) + bli_cimag(temp) * bli_creal(a[i__4])), q__2 ); - bli_csets( (bli_creal(x[i__3]) - bli_creal(q__2)), (bli_cimag(x[i__3]) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(a[i__4]) - bli_cimag(temp) * bli_cimag(a[i__4])), (bli_creal(temp) * bli_cimag(a[i__4]) + bli_cimag(temp) * bli_creal(a[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__3]) - bli_creal(q__2)), (bli_cimag(x[i__3]) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); /* L10: */ } } @@ -303,10 +303,10 @@ if (nounit) { i__1 = jx; bla_c_div(&q__1, &x[jx], &a[kplus1 + j * a_dim1]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); } i__1 = jx; - bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); + bli_tsets( c,c, (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__1 = f2c_max(i__2,i__3); @@ -314,9 +314,9 @@ i__2 = ix; i__3 = ix; i__4 = l + i__ + j * a_dim1; - bli_csets( (bli_creal(temp) * bli_creal(a[i__4]) - bli_cimag(temp) * bli_cimag(a[i__4])), (bli_creal(temp) * bli_cimag(a[i__4]) + bli_cimag(temp) * bli_creal(a[i__4])), q__2 ); - bli_csets( (bli_creal(x[i__3]) - bli_creal(q__2)), (bli_cimag(x[i__3]) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(a[i__4]) - bli_cimag(temp) * bli_cimag(a[i__4])), (bli_creal(temp) * bli_cimag(a[i__4]) + bli_cimag(temp) * bli_creal(a[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__3]) - bli_creal(q__2)), (bli_cimag(x[i__3]) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); ix -= *incx; /* L30: */ } @@ -335,10 +335,10 @@ if (nounit) { i__2 = j; bla_c_div(&q__1, &x[j], &a[j * a_dim1 + 1]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); } i__2 = j; - bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); + bli_tsets( c,c, (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); /* Computing MIN */ i__3 = *n, i__4 = j + *k; i__2 = f2c_min(i__3,i__4); @@ -346,9 +346,9 @@ i__3 = i__; i__4 = i__; i__5 = l + i__ + j * a_dim1; - bli_csets( (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); - bli_csets( (bli_creal(x[i__4]) - bli_creal(q__2)), (bli_cimag(x[i__4]) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__4]) - bli_creal(q__2)), (bli_cimag(x[i__4]) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); /* L50: */ } } @@ -366,10 +366,10 @@ if (nounit) { i__2 = jx; bla_c_div(&q__1, &x[jx], &a[j * a_dim1 + 1]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); } i__2 = jx; - bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); + bli_tsets( c,c, (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); /* Computing MIN */ i__3 = *n, i__4 = j + *k; i__2 = f2c_min(i__3,i__4); @@ -377,9 +377,9 @@ i__3 = ix; i__4 = ix; i__5 = l + i__ + j * a_dim1; - bli_csets( (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); - bli_csets( (bli_creal(x[i__4]) - bli_creal(q__2)), (bli_cimag(x[i__4]) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__4]) - bli_creal(q__2)), (bli_cimag(x[i__4]) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); ix += *incx; /* L70: */ } @@ -399,7 +399,7 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; - bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); + bli_tsets( c,c, (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); l = kplus1 - j; if (noconj) { /* Computing MAX */ @@ -408,14 +408,14 @@ for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { i__2 = l + i__ + j * a_dim1; i__3 = i__; - bli_csets( (bli_creal(a[i__2]) * bli_creal(x[i__3]) - bli_cimag(a[i__2]) * bli_cimag(x[i__3])), (bli_creal(a[i__2]) * bli_cimag(x[i__3]) + bli_cimag(a[i__2]) * bli_creal(x[i__3])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(a[i__2]) * bli_creal(x[i__3]) - bli_cimag(a[i__2]) * bli_cimag(x[i__3])), (bli_creal(a[i__2]) * bli_cimag(x[i__3]) + bli_cimag(a[i__2]) * bli_creal(x[i__3])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L90: */ } if (nounit) { bla_c_div(&q__1, &temp, &a[kplus1 + j * a_dim1]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { /* Computing MAX */ @@ -424,19 +424,19 @@ for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__4 = i__; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L100: */ } if (nounit) { bla_r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); bla_c_div(&q__1, &temp, &q__2); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__3 = j; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__3] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__3] ); /* L110: */ } } else { @@ -444,7 +444,7 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__3 = jx; - bli_csets( (bli_creal(x[i__3])), (bli_cimag(x[i__3])), temp ); + bli_tsets( c,c, (bli_creal(x[i__3])), (bli_cimag(x[i__3])), temp ); ix = kx; l = kplus1 - j; if (noconj) { @@ -454,15 +454,15 @@ for (i__ = f2c_max(i__3,i__4); i__ <= i__2; ++i__) { i__3 = l + i__ + j * a_dim1; i__4 = ix; - bli_csets( (bli_creal(a[i__3]) * bli_creal(x[i__4]) - bli_cimag(a[i__3]) * bli_cimag(x[i__4])), (bli_creal(a[i__3]) * bli_cimag(x[i__4]) + bli_cimag(a[i__3]) * bli_creal(x[i__4])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(a[i__3]) * bli_creal(x[i__4]) - bli_cimag(a[i__3]) * bli_cimag(x[i__4])), (bli_creal(a[i__3]) * bli_cimag(x[i__4]) + bli_cimag(a[i__3]) * bli_creal(x[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L120: */ } if (nounit) { bla_c_div(&q__1, &temp, &a[kplus1 + j * a_dim1]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { /* Computing MAX */ @@ -471,20 +471,20 @@ for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__2 = ix; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L130: */ } if (nounit) { bla_r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); bla_c_div(&q__1, &temp, &q__2); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__4 = jx; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__4] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__4] ); jx += *incx; if (j > *k) { kx += *incx; @@ -496,7 +496,7 @@ if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; - bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); + bli_tsets( c,c, (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); l = 1 - j; if (noconj) { /* Computing MIN */ @@ -505,14 +505,14 @@ for (i__ = f2c_min(i__1,i__4); i__ >= i__2; --i__) { i__1 = l + i__ + j * a_dim1; i__4 = i__; - bli_csets( (bli_creal(a[i__1]) * bli_creal(x[i__4]) - bli_cimag(a[i__1]) * bli_cimag(x[i__4])), (bli_creal(a[i__1]) * bli_cimag(x[i__4]) + bli_cimag(a[i__1]) * bli_creal(x[i__4])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(a[i__1]) * bli_creal(x[i__4]) - bli_cimag(a[i__1]) * bli_cimag(x[i__4])), (bli_creal(a[i__1]) * bli_cimag(x[i__4]) + bli_cimag(a[i__1]) * bli_creal(x[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L150: */ } if (nounit) { bla_c_div(&q__1, &temp, &a[j * a_dim1 + 1]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { /* Computing MIN */ @@ -521,19 +521,19 @@ for (i__ = f2c_min(i__2,i__1); i__ >= i__4; --i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__2 = i__; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L160: */ } if (nounit) { bla_r_cnjg(&q__2, &a[j * a_dim1 + 1]); bla_c_div(&q__1, &temp, &q__2); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__4 = j; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__4] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__4] ); /* L170: */ } } else { @@ -541,7 +541,7 @@ jx = kx; for (j = *n; j >= 1; --j) { i__4 = jx; - bli_csets( (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); + bli_tsets( c,c, (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); ix = kx; l = 1 - j; if (noconj) { @@ -551,15 +551,15 @@ for (i__ = f2c_min(i__4,i__2); i__ >= i__1; --i__) { i__4 = l + i__ + j * a_dim1; i__2 = ix; - bli_csets( (bli_creal(a[i__4]) * bli_creal(x[i__2]) - bli_cimag(a[i__4]) * bli_cimag(x[i__2])), (bli_creal(a[i__4]) * bli_cimag(x[i__2]) + bli_cimag(a[i__4]) * bli_creal(x[i__2])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(a[i__4]) * bli_creal(x[i__2]) - bli_cimag(a[i__4]) * bli_cimag(x[i__2])), (bli_creal(a[i__4]) * bli_cimag(x[i__2]) + bli_cimag(a[i__4]) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix -= *incx; /* L180: */ } if (nounit) { bla_c_div(&q__1, &temp, &a[j * a_dim1 + 1]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { /* Computing MIN */ @@ -568,20 +568,20 @@ for (i__ = f2c_min(i__1,i__4); i__ >= i__2; --i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__1 = ix; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__1]) - bli_cimag(q__3) * bli_cimag(x[i__1])), (bli_creal(q__3) * bli_cimag(x[i__1]) + bli_cimag(q__3) * bli_creal(x[i__1])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__1]) - bli_cimag(q__3) * bli_cimag(x[i__1])), (bli_creal(q__3) * bli_cimag(x[i__1]) + bli_cimag(q__3) * bli_creal(x[i__1])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix -= *incx; /* L190: */ } if (nounit) { bla_r_cnjg(&q__2, &a[j * a_dim1 + 1]); bla_c_div(&q__1, &temp, &q__2); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__2 = jx; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); jx -= *incx; if (*n - j >= *k) { kx -= *incx; @@ -1665,10 +1665,10 @@ if (nounit) { i__1 = j; bla_z_div(&z__1, &x[j], &a[kplus1 + j * a_dim1]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); } i__1 = j; - bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__1 = f2c_max(i__2,i__3); @@ -1676,9 +1676,9 @@ i__2 = i__; i__3 = i__; i__4 = l + i__ + j * a_dim1; - bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__4]) - bli_zimag(temp) * bli_zimag(a[i__4])), (bli_zreal(temp) * bli_zimag(a[i__4]) + bli_zimag(temp) * bli_zreal(a[i__4])), z__2 ); - bli_zsets( (bli_zreal(x[i__3]) - bli_zreal(z__2)), (bli_zimag(x[i__3]) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(a[i__4]) - bli_zimag(temp) * bli_zimag(a[i__4])), (bli_zreal(temp) * bli_zimag(a[i__4]) + bli_zimag(temp) * bli_zreal(a[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__3]) - bli_zreal(z__2)), (bli_zimag(x[i__3]) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); /* L10: */ } } @@ -1696,10 +1696,10 @@ if (nounit) { i__1 = jx; bla_z_div(&z__1, &x[jx], &a[kplus1 + j * a_dim1]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); } i__1 = jx; - bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__1 = f2c_max(i__2,i__3); @@ -1707,9 +1707,9 @@ i__2 = ix; i__3 = ix; i__4 = l + i__ + j * a_dim1; - bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__4]) - bli_zimag(temp) * bli_zimag(a[i__4])), (bli_zreal(temp) * bli_zimag(a[i__4]) + bli_zimag(temp) * bli_zreal(a[i__4])), z__2 ); - bli_zsets( (bli_zreal(x[i__3]) - bli_zreal(z__2)), (bli_zimag(x[i__3]) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(a[i__4]) - bli_zimag(temp) * bli_zimag(a[i__4])), (bli_zreal(temp) * bli_zimag(a[i__4]) + bli_zimag(temp) * bli_zreal(a[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__3]) - bli_zreal(z__2)), (bli_zimag(x[i__3]) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); ix -= *incx; /* L30: */ } @@ -1728,10 +1728,10 @@ if (nounit) { i__2 = j; bla_z_div(&z__1, &x[j], &a[j * a_dim1 + 1]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); } i__2 = j; - bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); /* Computing MIN */ i__3 = *n, i__4 = j + *k; i__2 = f2c_min(i__3,i__4); @@ -1739,9 +1739,9 @@ i__3 = i__; i__4 = i__; i__5 = l + i__ + j * a_dim1; - bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); - bli_zsets( (bli_zreal(x[i__4]) - bli_zreal(z__2)), (bli_zimag(x[i__4]) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__4]) - bli_zreal(z__2)), (bli_zimag(x[i__4]) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); /* L50: */ } } @@ -1759,10 +1759,10 @@ if (nounit) { i__2 = jx; bla_z_div(&z__1, &x[jx], &a[j * a_dim1 + 1]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); } i__2 = jx; - bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); /* Computing MIN */ i__3 = *n, i__4 = j + *k; i__2 = f2c_min(i__3,i__4); @@ -1770,9 +1770,9 @@ i__3 = ix; i__4 = ix; i__5 = l + i__ + j * a_dim1; - bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); - bli_zsets( (bli_zreal(x[i__4]) - bli_zreal(z__2)), (bli_zimag(x[i__4]) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__4]) - bli_zreal(z__2)), (bli_zimag(x[i__4]) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); ix += *incx; /* L70: */ } @@ -1792,7 +1792,7 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; - bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); l = kplus1 - j; if (noconj) { /* Computing MAX */ @@ -1801,14 +1801,14 @@ for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { i__2 = l + i__ + j * a_dim1; i__3 = i__; - bli_zsets( (bli_zreal(a[i__2]) * bli_zreal(x[i__3]) - bli_zimag(a[i__2]) * bli_zimag(x[i__3])), (bli_zreal(a[i__2]) * bli_zimag(x[i__3]) + bli_zimag(a[i__2]) * bli_zreal(x[i__3])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(a[i__2]) * bli_zreal(x[i__3]) - bli_zimag(a[i__2]) * bli_zimag(x[i__3])), (bli_zreal(a[i__2]) * bli_zimag(x[i__3]) + bli_zimag(a[i__2]) * bli_zreal(x[i__3])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L90: */ } if (nounit) { bla_z_div(&z__1, &temp, &a[kplus1 + j * a_dim1]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { /* Computing MAX */ @@ -1817,19 +1817,19 @@ for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__4 = i__; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L100: */ } if (nounit) { bla_d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); bla_z_div(&z__1, &temp, &z__2); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__3 = j; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__3] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__3] ); /* L110: */ } } else { @@ -1837,7 +1837,7 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__3 = jx; - bli_zsets( (bli_zreal(x[i__3])), (bli_zimag(x[i__3])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__3])), (bli_zimag(x[i__3])), temp ); ix = kx; l = kplus1 - j; if (noconj) { @@ -1847,15 +1847,15 @@ for (i__ = f2c_max(i__3,i__4); i__ <= i__2; ++i__) { i__3 = l + i__ + j * a_dim1; i__4 = ix; - bli_zsets( (bli_zreal(a[i__3]) * bli_zreal(x[i__4]) - bli_zimag(a[i__3]) * bli_zimag(x[i__4])), (bli_zreal(a[i__3]) * bli_zimag(x[i__4]) + bli_zimag(a[i__3]) * bli_zreal(x[i__4])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(a[i__3]) * bli_zreal(x[i__4]) - bli_zimag(a[i__3]) * bli_zimag(x[i__4])), (bli_zreal(a[i__3]) * bli_zimag(x[i__4]) + bli_zimag(a[i__3]) * bli_zreal(x[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L120: */ } if (nounit) { bla_z_div(&z__1, &temp, &a[kplus1 + j * a_dim1]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { /* Computing MAX */ @@ -1864,20 +1864,20 @@ for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__2 = ix; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L130: */ } if (nounit) { bla_d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); bla_z_div(&z__1, &temp, &z__2); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__4 = jx; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__4] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__4] ); jx += *incx; if (j > *k) { kx += *incx; @@ -1889,7 +1889,7 @@ if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; - bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); l = 1 - j; if (noconj) { /* Computing MIN */ @@ -1898,14 +1898,14 @@ for (i__ = f2c_min(i__1,i__4); i__ >= i__2; --i__) { i__1 = l + i__ + j * a_dim1; i__4 = i__; - bli_zsets( (bli_zreal(a[i__1]) * bli_zreal(x[i__4]) - bli_zimag(a[i__1]) * bli_zimag(x[i__4])), (bli_zreal(a[i__1]) * bli_zimag(x[i__4]) + bli_zimag(a[i__1]) * bli_zreal(x[i__4])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(a[i__1]) * bli_zreal(x[i__4]) - bli_zimag(a[i__1]) * bli_zimag(x[i__4])), (bli_zreal(a[i__1]) * bli_zimag(x[i__4]) + bli_zimag(a[i__1]) * bli_zreal(x[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L150: */ } if (nounit) { bla_z_div(&z__1, &temp, &a[j * a_dim1 + 1]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { /* Computing MIN */ @@ -1914,19 +1914,19 @@ for (i__ = f2c_min(i__2,i__1); i__ >= i__4; --i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__2 = i__; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L160: */ } if (nounit) { bla_d_cnjg(&z__2, &a[j * a_dim1 + 1]); bla_z_div(&z__1, &temp, &z__2); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__4 = j; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__4] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__4] ); /* L170: */ } } else { @@ -1934,7 +1934,7 @@ jx = kx; for (j = *n; j >= 1; --j) { i__4 = jx; - bli_zsets( (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); ix = kx; l = 1 - j; if (noconj) { @@ -1944,15 +1944,15 @@ for (i__ = f2c_min(i__4,i__2); i__ >= i__1; --i__) { i__4 = l + i__ + j * a_dim1; i__2 = ix; - bli_zsets( (bli_zreal(a[i__4]) * bli_zreal(x[i__2]) - bli_zimag(a[i__4]) * bli_zimag(x[i__2])), (bli_zreal(a[i__4]) * bli_zimag(x[i__2]) + bli_zimag(a[i__4]) * bli_zreal(x[i__2])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(a[i__4]) * bli_zreal(x[i__2]) - bli_zimag(a[i__4]) * bli_zimag(x[i__2])), (bli_zreal(a[i__4]) * bli_zimag(x[i__2]) + bli_zimag(a[i__4]) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix -= *incx; /* L180: */ } if (nounit) { bla_z_div(&z__1, &temp, &a[j * a_dim1 + 1]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { /* Computing MIN */ @@ -1961,20 +1961,20 @@ for (i__ = f2c_min(i__1,i__4); i__ >= i__2; --i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__1 = ix; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__1]) - bli_zimag(z__3) * bli_zimag(x[i__1])), (bli_zreal(z__3) * bli_zimag(x[i__1]) + bli_zimag(z__3) * bli_zreal(x[i__1])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__1]) - bli_zimag(z__3) * bli_zimag(x[i__1])), (bli_zreal(z__3) * bli_zimag(x[i__1]) + bli_zimag(z__3) * bli_zreal(x[i__1])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix -= *incx; /* L190: */ } if (nounit) { bla_d_cnjg(&z__2, &a[j * a_dim1 + 1]); bla_z_div(&z__1, &temp, &z__2); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__2 = jx; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); jx -= *incx; if (*n - j >= *k) { kx -= *incx; diff --git a/frame/compat/f2c/bla_tpmv.c b/frame/compat/f2c/bla_tpmv.c index 853f30156f..f4bc0adee1 100644 --- a/frame/compat/f2c/bla_tpmv.c +++ b/frame/compat/f2c/bla_tpmv.c @@ -220,16 +220,16 @@ i__2 = j; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { i__2 = j; - bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); + bli_tsets( c,c, (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; - bli_csets( (bli_creal(temp) * bli_creal(ap[i__5]) - bli_cimag(temp) * bli_cimag(ap[i__5])), (bli_creal(temp) * bli_cimag(ap[i__5]) + bli_cimag(temp) * bli_creal(ap[i__5])), q__2 ); - bli_csets( (bli_creal(x[i__4]) + bli_creal(q__2)), (bli_cimag(x[i__4]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(ap[i__5]) - bli_cimag(temp) * bli_cimag(ap[i__5])), (bli_creal(temp) * bli_cimag(ap[i__5]) + bli_cimag(temp) * bli_creal(ap[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__4]) + bli_creal(q__2)), (bli_cimag(x[i__4]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); ++k; /* L10: */ } @@ -237,8 +237,8 @@ i__2 = j; i__3 = j; i__4 = kk + j - 1; - bli_csets( (bli_creal(x[i__3]) * bli_creal(ap[i__4]) - bli_cimag(x[i__3]) * bli_cimag(ap[i__4])), (bli_creal(x[i__3]) * bli_cimag(ap[i__4]) + bli_cimag(x[i__3]) * bli_creal(ap[i__4])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); + bli_tsets( c,c, (bli_creal(x[i__3]) * bli_creal(ap[i__4]) - bli_cimag(x[i__3]) * bli_cimag(ap[i__4])), (bli_creal(x[i__3]) * bli_cimag(ap[i__4]) + bli_cimag(x[i__3]) * bli_creal(ap[i__4])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); } } kk += j; @@ -251,16 +251,16 @@ i__2 = jx; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { i__2 = jx; - bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); + bli_tsets( c,c, (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); ix = kx; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = ix; i__4 = ix; i__5 = k; - bli_csets( (bli_creal(temp) * bli_creal(ap[i__5]) - bli_cimag(temp) * bli_cimag(ap[i__5])), (bli_creal(temp) * bli_cimag(ap[i__5]) + bli_cimag(temp) * bli_creal(ap[i__5])), q__2 ); - bli_csets( (bli_creal(x[i__4]) + bli_creal(q__2)), (bli_cimag(x[i__4]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(ap[i__5]) - bli_cimag(temp) * bli_cimag(ap[i__5])), (bli_creal(temp) * bli_cimag(ap[i__5]) + bli_cimag(temp) * bli_creal(ap[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__4]) + bli_creal(q__2)), (bli_cimag(x[i__4]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); ix += *incx; /* L30: */ } @@ -268,8 +268,8 @@ i__2 = jx; i__3 = jx; i__4 = kk + j - 1; - bli_csets( (bli_creal(x[i__3]) * bli_creal(ap[i__4]) - bli_cimag(x[i__3]) * bli_cimag(ap[i__4])), (bli_creal(x[i__3]) * bli_cimag(ap[i__4]) + bli_cimag(x[i__3]) * bli_creal(ap[i__4])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); + bli_tsets( c,c, (bli_creal(x[i__3]) * bli_creal(ap[i__4]) - bli_cimag(x[i__3]) * bli_cimag(ap[i__4])), (bli_creal(x[i__3]) * bli_cimag(ap[i__4]) + bli_cimag(x[i__3]) * bli_creal(ap[i__4])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); } } jx += *incx; @@ -284,16 +284,16 @@ i__1 = j; if (bli_creal(x[i__1]) != 0.f || bli_cimag(x[i__1]) != 0.f) { i__1 = j; - bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); + bli_tsets( c,c, (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); k = kk; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { i__2 = i__; i__3 = i__; i__4 = k; - bli_csets( (bli_creal(temp) * bli_creal(ap[i__4]) - bli_cimag(temp) * bli_cimag(ap[i__4])), (bli_creal(temp) * bli_cimag(ap[i__4]) + bli_cimag(temp) * bli_creal(ap[i__4])), q__2 ); - bli_csets( (bli_creal(x[i__3]) + bli_creal(q__2)), (bli_cimag(x[i__3]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(ap[i__4]) - bli_cimag(temp) * bli_cimag(ap[i__4])), (bli_creal(temp) * bli_cimag(ap[i__4]) + bli_cimag(temp) * bli_creal(ap[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__3]) + bli_creal(q__2)), (bli_cimag(x[i__3]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); --k; /* L50: */ } @@ -301,8 +301,8 @@ i__1 = j; i__2 = j; i__3 = kk - *n + j; - bli_csets( (bli_creal(x[i__2]) * bli_creal(ap[i__3]) - bli_cimag(x[i__2]) * bli_cimag(ap[i__3])), (bli_creal(x[i__2]) * bli_cimag(ap[i__3]) + bli_cimag(x[i__2]) * bli_creal(ap[i__3])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); + bli_tsets( c,c, (bli_creal(x[i__2]) * bli_creal(ap[i__3]) - bli_cimag(x[i__2]) * bli_cimag(ap[i__3])), (bli_creal(x[i__2]) * bli_cimag(ap[i__3]) + bli_cimag(x[i__2]) * bli_creal(ap[i__3])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); } } kk -= *n - j + 1; @@ -315,16 +315,16 @@ i__1 = jx; if (bli_creal(x[i__1]) != 0.f || bli_cimag(x[i__1]) != 0.f) { i__1 = jx; - bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); + bli_tsets( c,c, (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); ix = kx; i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { i__2 = ix; i__3 = ix; i__4 = k; - bli_csets( (bli_creal(temp) * bli_creal(ap[i__4]) - bli_cimag(temp) * bli_cimag(ap[i__4])), (bli_creal(temp) * bli_cimag(ap[i__4]) + bli_cimag(temp) * bli_creal(ap[i__4])), q__2 ); - bli_csets( (bli_creal(x[i__3]) + bli_creal(q__2)), (bli_cimag(x[i__3]) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(ap[i__4]) - bli_cimag(temp) * bli_cimag(ap[i__4])), (bli_creal(temp) * bli_cimag(ap[i__4]) + bli_cimag(temp) * bli_creal(ap[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__3]) + bli_creal(q__2)), (bli_cimag(x[i__3]) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); ix -= *incx; /* L70: */ } @@ -332,8 +332,8 @@ i__1 = jx; i__2 = jx; i__3 = kk - *n + j; - bli_csets( (bli_creal(x[i__2]) * bli_creal(ap[i__3]) - bli_cimag(x[i__2]) * bli_cimag(ap[i__3])), (bli_creal(x[i__2]) * bli_cimag(ap[i__3]) + bli_cimag(x[i__2]) * bli_creal(ap[i__3])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); + bli_tsets( c,c, (bli_creal(x[i__2]) * bli_creal(ap[i__3]) - bli_cimag(x[i__2]) * bli_cimag(ap[i__3])), (bli_creal(x[i__2]) * bli_cimag(ap[i__3]) + bli_cimag(x[i__2]) * bli_creal(ap[i__3])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); } } jx -= *incx; @@ -351,41 +351,41 @@ if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; - bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); + bli_tsets( c,c, (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); k = kk - 1; if (noconj) { if (nounit) { i__1 = kk; - bli_csets( (bli_creal(temp) * bli_creal(ap[i__1]) - bli_cimag(temp) * bli_cimag(ap[i__1])), (bli_creal(temp) * bli_cimag(ap[i__1]) + bli_cimag(temp) * bli_creal(ap[i__1])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(ap[i__1]) - bli_cimag(temp) * bli_cimag(ap[i__1])), (bli_creal(temp) * bli_cimag(ap[i__1]) + bli_cimag(temp) * bli_creal(ap[i__1])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } for (i__ = j - 1; i__ >= 1; --i__) { i__1 = k; i__2 = i__; - bli_csets( (bli_creal(ap[i__1]) * bli_creal(x[i__2]) - bli_cimag(ap[i__1]) * bli_cimag(x[i__2])), (bli_creal(ap[i__1]) * bli_cimag(x[i__2]) + bli_cimag(ap[i__1]) * bli_creal(x[i__2])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(ap[i__1]) * bli_creal(x[i__2]) - bli_cimag(ap[i__1]) * bli_cimag(x[i__2])), (bli_creal(ap[i__1]) * bli_cimag(x[i__2]) + bli_cimag(ap[i__1]) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); --k; /* L90: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &ap[kk]); - bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } for (i__ = j - 1; i__ >= 1; --i__) { bla_r_cnjg(&q__3, &ap[k]); i__1 = i__; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__1]) - bli_cimag(q__3) * bli_cimag(x[i__1])), (bli_creal(q__3) * bli_cimag(x[i__1]) + bli_cimag(q__3) * bli_creal(x[i__1])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__1]) - bli_cimag(q__3) * bli_cimag(x[i__1])), (bli_creal(q__3) * bli_cimag(x[i__1]) + bli_cimag(q__3) * bli_creal(x[i__1])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); --k; /* L100: */ } } i__1 = j; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__1] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__1] ); kk -= j; /* L110: */ } @@ -393,43 +393,43 @@ jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { i__1 = jx; - bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); + bli_tsets( c,c, (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); ix = jx; if (noconj) { if (nounit) { i__1 = kk; - bli_csets( (bli_creal(temp) * bli_creal(ap[i__1]) - bli_cimag(temp) * bli_cimag(ap[i__1])), (bli_creal(temp) * bli_cimag(ap[i__1]) + bli_cimag(temp) * bli_creal(ap[i__1])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(ap[i__1]) - bli_cimag(temp) * bli_cimag(ap[i__1])), (bli_creal(temp) * bli_cimag(ap[i__1]) + bli_cimag(temp) * bli_creal(ap[i__1])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; i__2 = k; i__3 = ix; - bli_csets( (bli_creal(ap[i__2]) * bli_creal(x[i__3]) - bli_cimag(ap[i__2]) * bli_cimag(x[i__3])), (bli_creal(ap[i__2]) * bli_cimag(x[i__3]) + bli_cimag(ap[i__2]) * bli_creal(x[i__3])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(ap[i__2]) * bli_creal(x[i__3]) - bli_cimag(ap[i__2]) * bli_cimag(x[i__3])), (bli_creal(ap[i__2]) * bli_cimag(x[i__3]) + bli_cimag(ap[i__2]) * bli_creal(x[i__3])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L120: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &ap[kk]); - bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; bla_r_cnjg(&q__3, &ap[k]); i__2 = ix; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L130: */ } } i__1 = jx; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__1] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__1] ); jx -= *incx; kk -= j; /* L140: */ @@ -441,43 +441,43 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; - bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); + bli_tsets( c,c, (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); k = kk + 1; if (noconj) { if (nounit) { i__2 = kk; - bli_csets( (bli_creal(temp) * bli_creal(ap[i__2]) - bli_cimag(temp) * bli_cimag(ap[i__2])), (bli_creal(temp) * bli_cimag(ap[i__2]) + bli_cimag(temp) * bli_creal(ap[i__2])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(ap[i__2]) - bli_cimag(temp) * bli_cimag(ap[i__2])), (bli_creal(temp) * bli_cimag(ap[i__2]) + bli_cimag(temp) * bli_creal(ap[i__2])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = i__; - bli_csets( (bli_creal(ap[i__3]) * bli_creal(x[i__4]) - bli_cimag(ap[i__3]) * bli_cimag(x[i__4])), (bli_creal(ap[i__3]) * bli_cimag(x[i__4]) + bli_cimag(ap[i__3]) * bli_creal(x[i__4])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(ap[i__3]) * bli_creal(x[i__4]) - bli_cimag(ap[i__3]) * bli_cimag(x[i__4])), (bli_creal(ap[i__3]) * bli_cimag(x[i__4]) + bli_cimag(ap[i__3]) * bli_creal(x[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ++k; /* L150: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &ap[kk]); - bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { bla_r_cnjg(&q__3, &ap[k]); i__3 = i__; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ++k; /* L160: */ } } i__2 = j; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); kk += *n - j + 1; /* L170: */ } @@ -486,43 +486,43 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; - bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); + bli_tsets( c,c, (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); ix = jx; if (noconj) { if (nounit) { i__2 = kk; - bli_csets( (bli_creal(temp) * bli_creal(ap[i__2]) - bli_cimag(temp) * bli_cimag(ap[i__2])), (bli_creal(temp) * bli_cimag(ap[i__2]) + bli_cimag(temp) * bli_creal(ap[i__2])), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(ap[i__2]) - bli_cimag(temp) * bli_cimag(ap[i__2])), (bli_creal(temp) * bli_cimag(ap[i__2]) + bli_cimag(temp) * bli_creal(ap[i__2])), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; i__3 = k; i__4 = ix; - bli_csets( (bli_creal(ap[i__3]) * bli_creal(x[i__4]) - bli_cimag(ap[i__3]) * bli_cimag(x[i__4])), (bli_creal(ap[i__3]) * bli_cimag(x[i__4]) + bli_cimag(ap[i__3]) * bli_creal(x[i__4])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(ap[i__3]) * bli_creal(x[i__4]) - bli_cimag(ap[i__3]) * bli_cimag(x[i__4])), (bli_creal(ap[i__3]) * bli_cimag(x[i__4]) + bli_cimag(ap[i__3]) * bli_creal(x[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L180: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &ap[kk]); - bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; bla_r_cnjg(&q__3, &ap[k]); i__3 = ix; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); - bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L190: */ } } i__2 = jx; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); jx += *incx; kk += *n - j + 1; /* L200: */ @@ -1417,16 +1417,16 @@ i__2 = j; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { i__2 = j; - bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; - bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__5]) - bli_zimag(temp) * bli_zimag(ap[i__5])), (bli_zreal(temp) * bli_zimag(ap[i__5]) + bli_zimag(temp) * bli_zreal(ap[i__5])), z__2 ); - bli_zsets( (bli_zreal(x[i__4]) + bli_zreal(z__2)), (bli_zimag(x[i__4]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(ap[i__5]) - bli_zimag(temp) * bli_zimag(ap[i__5])), (bli_zreal(temp) * bli_zimag(ap[i__5]) + bli_zimag(temp) * bli_zreal(ap[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__4]) + bli_zreal(z__2)), (bli_zimag(x[i__4]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); ++k; /* L10: */ } @@ -1434,8 +1434,8 @@ i__2 = j; i__3 = j; i__4 = kk + j - 1; - bli_zsets( (bli_zreal(x[i__3]) * bli_zreal(ap[i__4]) - bli_zimag(x[i__3]) * bli_zimag(ap[i__4])), (bli_zreal(x[i__3]) * bli_zimag(ap[i__4]) + bli_zimag(x[i__3]) * bli_zreal(ap[i__4])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(x[i__3]) * bli_zreal(ap[i__4]) - bli_zimag(x[i__3]) * bli_zimag(ap[i__4])), (bli_zreal(x[i__3]) * bli_zimag(ap[i__4]) + bli_zimag(x[i__3]) * bli_zreal(ap[i__4])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); } } kk += j; @@ -1448,16 +1448,16 @@ i__2 = jx; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { i__2 = jx; - bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); ix = kx; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = ix; i__4 = ix; i__5 = k; - bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__5]) - bli_zimag(temp) * bli_zimag(ap[i__5])), (bli_zreal(temp) * bli_zimag(ap[i__5]) + bli_zimag(temp) * bli_zreal(ap[i__5])), z__2 ); - bli_zsets( (bli_zreal(x[i__4]) + bli_zreal(z__2)), (bli_zimag(x[i__4]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(ap[i__5]) - bli_zimag(temp) * bli_zimag(ap[i__5])), (bli_zreal(temp) * bli_zimag(ap[i__5]) + bli_zimag(temp) * bli_zreal(ap[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__4]) + bli_zreal(z__2)), (bli_zimag(x[i__4]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); ix += *incx; /* L30: */ } @@ -1465,8 +1465,8 @@ i__2 = jx; i__3 = jx; i__4 = kk + j - 1; - bli_zsets( (bli_zreal(x[i__3]) * bli_zreal(ap[i__4]) - bli_zimag(x[i__3]) * bli_zimag(ap[i__4])), (bli_zreal(x[i__3]) * bli_zimag(ap[i__4]) + bli_zimag(x[i__3]) * bli_zreal(ap[i__4])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(x[i__3]) * bli_zreal(ap[i__4]) - bli_zimag(x[i__3]) * bli_zimag(ap[i__4])), (bli_zreal(x[i__3]) * bli_zimag(ap[i__4]) + bli_zimag(x[i__3]) * bli_zreal(ap[i__4])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); } } jx += *incx; @@ -1481,16 +1481,16 @@ i__1 = j; if (bli_zreal(x[i__1]) != 0. || bli_zimag(x[i__1]) != 0.) { i__1 = j; - bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); k = kk; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { i__2 = i__; i__3 = i__; i__4 = k; - bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__4]) - bli_zimag(temp) * bli_zimag(ap[i__4])), (bli_zreal(temp) * bli_zimag(ap[i__4]) + bli_zimag(temp) * bli_zreal(ap[i__4])), z__2 ); - bli_zsets( (bli_zreal(x[i__3]) + bli_zreal(z__2)), (bli_zimag(x[i__3]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(ap[i__4]) - bli_zimag(temp) * bli_zimag(ap[i__4])), (bli_zreal(temp) * bli_zimag(ap[i__4]) + bli_zimag(temp) * bli_zreal(ap[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__3]) + bli_zreal(z__2)), (bli_zimag(x[i__3]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); --k; /* L50: */ } @@ -1498,8 +1498,8 @@ i__1 = j; i__2 = j; i__3 = kk - *n + j; - bli_zsets( (bli_zreal(x[i__2]) * bli_zreal(ap[i__3]) - bli_zimag(x[i__2]) * bli_zimag(ap[i__3])), (bli_zreal(x[i__2]) * bli_zimag(ap[i__3]) + bli_zimag(x[i__2]) * bli_zreal(ap[i__3])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); + bli_tsets( z,z, (bli_zreal(x[i__2]) * bli_zreal(ap[i__3]) - bli_zimag(x[i__2]) * bli_zimag(ap[i__3])), (bli_zreal(x[i__2]) * bli_zimag(ap[i__3]) + bli_zimag(x[i__2]) * bli_zreal(ap[i__3])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); } } kk -= *n - j + 1; @@ -1512,16 +1512,16 @@ i__1 = jx; if (bli_zreal(x[i__1]) != 0. || bli_zimag(x[i__1]) != 0.) { i__1 = jx; - bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); ix = kx; i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { i__2 = ix; i__3 = ix; i__4 = k; - bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__4]) - bli_zimag(temp) * bli_zimag(ap[i__4])), (bli_zreal(temp) * bli_zimag(ap[i__4]) + bli_zimag(temp) * bli_zreal(ap[i__4])), z__2 ); - bli_zsets( (bli_zreal(x[i__3]) + bli_zreal(z__2)), (bli_zimag(x[i__3]) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(ap[i__4]) - bli_zimag(temp) * bli_zimag(ap[i__4])), (bli_zreal(temp) * bli_zimag(ap[i__4]) + bli_zimag(temp) * bli_zreal(ap[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__3]) + bli_zreal(z__2)), (bli_zimag(x[i__3]) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); ix -= *incx; /* L70: */ } @@ -1529,8 +1529,8 @@ i__1 = jx; i__2 = jx; i__3 = kk - *n + j; - bli_zsets( (bli_zreal(x[i__2]) * bli_zreal(ap[i__3]) - bli_zimag(x[i__2]) * bli_zimag(ap[i__3])), (bli_zreal(x[i__2]) * bli_zimag(ap[i__3]) + bli_zimag(x[i__2]) * bli_zreal(ap[i__3])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); + bli_tsets( z,z, (bli_zreal(x[i__2]) * bli_zreal(ap[i__3]) - bli_zimag(x[i__2]) * bli_zimag(ap[i__3])), (bli_zreal(x[i__2]) * bli_zimag(ap[i__3]) + bli_zimag(x[i__2]) * bli_zreal(ap[i__3])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); } } jx -= *incx; @@ -1548,41 +1548,41 @@ if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; - bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); k = kk - 1; if (noconj) { if (nounit) { i__1 = kk; - bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__1]) - bli_zimag(temp) * bli_zimag(ap[i__1])), (bli_zreal(temp) * bli_zimag(ap[i__1]) + bli_zimag(temp) * bli_zreal(ap[i__1])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(ap[i__1]) - bli_zimag(temp) * bli_zimag(ap[i__1])), (bli_zreal(temp) * bli_zimag(ap[i__1]) + bli_zimag(temp) * bli_zreal(ap[i__1])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } for (i__ = j - 1; i__ >= 1; --i__) { i__1 = k; i__2 = i__; - bli_zsets( (bli_zreal(ap[i__1]) * bli_zreal(x[i__2]) - bli_zimag(ap[i__1]) * bli_zimag(x[i__2])), (bli_zreal(ap[i__1]) * bli_zimag(x[i__2]) + bli_zimag(ap[i__1]) * bli_zreal(x[i__2])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(ap[i__1]) * bli_zreal(x[i__2]) - bli_zimag(ap[i__1]) * bli_zimag(x[i__2])), (bli_zreal(ap[i__1]) * bli_zimag(x[i__2]) + bli_zimag(ap[i__1]) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); --k; /* L90: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &ap[kk]); - bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } for (i__ = j - 1; i__ >= 1; --i__) { bla_d_cnjg(&z__3, &ap[k]); i__1 = i__; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__1]) - bli_zimag(z__3) * bli_zimag(x[i__1])), (bli_zreal(z__3) * bli_zimag(x[i__1]) + bli_zimag(z__3) * bli_zreal(x[i__1])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__1]) - bli_zimag(z__3) * bli_zimag(x[i__1])), (bli_zreal(z__3) * bli_zimag(x[i__1]) + bli_zimag(z__3) * bli_zreal(x[i__1])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); --k; /* L100: */ } } i__1 = j; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__1] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__1] ); kk -= j; /* L110: */ } @@ -1590,43 +1590,43 @@ jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { i__1 = jx; - bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); ix = jx; if (noconj) { if (nounit) { i__1 = kk; - bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__1]) - bli_zimag(temp) * bli_zimag(ap[i__1])), (bli_zreal(temp) * bli_zimag(ap[i__1]) + bli_zimag(temp) * bli_zreal(ap[i__1])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(ap[i__1]) - bli_zimag(temp) * bli_zimag(ap[i__1])), (bli_zreal(temp) * bli_zimag(ap[i__1]) + bli_zimag(temp) * bli_zreal(ap[i__1])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; i__2 = k; i__3 = ix; - bli_zsets( (bli_zreal(ap[i__2]) * bli_zreal(x[i__3]) - bli_zimag(ap[i__2]) * bli_zimag(x[i__3])), (bli_zreal(ap[i__2]) * bli_zimag(x[i__3]) + bli_zimag(ap[i__2]) * bli_zreal(x[i__3])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(ap[i__2]) * bli_zreal(x[i__3]) - bli_zimag(ap[i__2]) * bli_zimag(x[i__3])), (bli_zreal(ap[i__2]) * bli_zimag(x[i__3]) + bli_zimag(ap[i__2]) * bli_zreal(x[i__3])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L120: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &ap[kk]); - bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; bla_d_cnjg(&z__3, &ap[k]); i__2 = ix; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L130: */ } } i__1 = jx; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__1] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__1] ); jx -= *incx; kk -= j; /* L140: */ @@ -1638,43 +1638,43 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; - bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); k = kk + 1; if (noconj) { if (nounit) { i__2 = kk; - bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__2]) - bli_zimag(temp) * bli_zimag(ap[i__2])), (bli_zreal(temp) * bli_zimag(ap[i__2]) + bli_zimag(temp) * bli_zreal(ap[i__2])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(ap[i__2]) - bli_zimag(temp) * bli_zimag(ap[i__2])), (bli_zreal(temp) * bli_zimag(ap[i__2]) + bli_zimag(temp) * bli_zreal(ap[i__2])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = i__; - bli_zsets( (bli_zreal(ap[i__3]) * bli_zreal(x[i__4]) - bli_zimag(ap[i__3]) * bli_zimag(x[i__4])), (bli_zreal(ap[i__3]) * bli_zimag(x[i__4]) + bli_zimag(ap[i__3]) * bli_zreal(x[i__4])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(ap[i__3]) * bli_zreal(x[i__4]) - bli_zimag(ap[i__3]) * bli_zimag(x[i__4])), (bli_zreal(ap[i__3]) * bli_zimag(x[i__4]) + bli_zimag(ap[i__3]) * bli_zreal(x[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ++k; /* L150: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &ap[kk]); - bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { bla_d_cnjg(&z__3, &ap[k]); i__3 = i__; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ++k; /* L160: */ } } i__2 = j; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); kk += *n - j + 1; /* L170: */ } @@ -1683,43 +1683,43 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; - bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); ix = jx; if (noconj) { if (nounit) { i__2 = kk; - bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__2]) - bli_zimag(temp) * bli_zimag(ap[i__2])), (bli_zreal(temp) * bli_zimag(ap[i__2]) + bli_zimag(temp) * bli_zreal(ap[i__2])), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(ap[i__2]) - bli_zimag(temp) * bli_zimag(ap[i__2])), (bli_zreal(temp) * bli_zimag(ap[i__2]) + bli_zimag(temp) * bli_zreal(ap[i__2])), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; i__3 = k; i__4 = ix; - bli_zsets( (bli_zreal(ap[i__3]) * bli_zreal(x[i__4]) - bli_zimag(ap[i__3]) * bli_zimag(x[i__4])), (bli_zreal(ap[i__3]) * bli_zimag(x[i__4]) + bli_zimag(ap[i__3]) * bli_zreal(x[i__4])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(ap[i__3]) * bli_zreal(x[i__4]) - bli_zimag(ap[i__3]) * bli_zimag(x[i__4])), (bli_zreal(ap[i__3]) * bli_zimag(x[i__4]) + bli_zimag(ap[i__3]) * bli_zreal(x[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L180: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &ap[kk]); - bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; bla_d_cnjg(&z__3, &ap[k]); i__3 = ix; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); - bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L190: */ } } i__2 = jx; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); jx += *incx; kk += *n - j + 1; /* L200: */ diff --git a/frame/compat/f2c/bla_tpsv.c b/frame/compat/f2c/bla_tpsv.c index 6a4a5ab6c2..a85cc0dba3 100644 --- a/frame/compat/f2c/bla_tpsv.c +++ b/frame/compat/f2c/bla_tpsv.c @@ -224,18 +224,18 @@ if (nounit) { i__1 = j; bla_c_div(&q__1, &x[j], &ap[kk]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); } i__1 = j; - bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); + bli_tsets( c,c, (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); k = kk - 1; for (i__ = j - 1; i__ >= 1; --i__) { i__1 = i__; i__2 = i__; i__3 = k; - bli_csets( (bli_creal(temp) * bli_creal(ap[i__3]) - bli_cimag(temp) * bli_cimag(ap[i__3])), (bli_creal(temp) * bli_cimag(ap[i__3]) + bli_cimag(temp) * bli_creal(ap[i__3])), q__2 ); - bli_csets( (bli_creal(x[i__2]) - bli_creal(q__2)), (bli_cimag(x[i__2]) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(ap[i__3]) - bli_cimag(temp) * bli_cimag(ap[i__3])), (bli_creal(temp) * bli_cimag(ap[i__3]) + bli_cimag(temp) * bli_creal(ap[i__3])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__2]) - bli_creal(q__2)), (bli_cimag(x[i__2]) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); --k; /* L10: */ } @@ -251,10 +251,10 @@ if (nounit) { i__1 = jx; bla_c_div(&q__1, &x[jx], &ap[kk]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); } i__1 = jx; - bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); + bli_tsets( c,c, (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); ix = jx; i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { @@ -262,9 +262,9 @@ i__2 = ix; i__3 = ix; i__4 = k; - bli_csets( (bli_creal(temp) * bli_creal(ap[i__4]) - bli_cimag(temp) * bli_cimag(ap[i__4])), (bli_creal(temp) * bli_cimag(ap[i__4]) + bli_cimag(temp) * bli_creal(ap[i__4])), q__2 ); - bli_csets( (bli_creal(x[i__3]) - bli_creal(q__2)), (bli_cimag(x[i__3]) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(ap[i__4]) - bli_cimag(temp) * bli_cimag(ap[i__4])), (bli_creal(temp) * bli_cimag(ap[i__4]) + bli_cimag(temp) * bli_creal(ap[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__3]) - bli_creal(q__2)), (bli_cimag(x[i__3]) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); /* L30: */ } } @@ -283,19 +283,19 @@ if (nounit) { i__2 = j; bla_c_div(&q__1, &x[j], &ap[kk]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); } i__2 = j; - bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); + bli_tsets( c,c, (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; - bli_csets( (bli_creal(temp) * bli_creal(ap[i__5]) - bli_cimag(temp) * bli_cimag(ap[i__5])), (bli_creal(temp) * bli_cimag(ap[i__5]) + bli_cimag(temp) * bli_creal(ap[i__5])), q__2 ); - bli_csets( (bli_creal(x[i__4]) - bli_creal(q__2)), (bli_cimag(x[i__4]) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(ap[i__5]) - bli_cimag(temp) * bli_cimag(ap[i__5])), (bli_creal(temp) * bli_cimag(ap[i__5]) + bli_cimag(temp) * bli_creal(ap[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__4]) - bli_creal(q__2)), (bli_cimag(x[i__4]) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); ++k; /* L50: */ } @@ -312,10 +312,10 @@ if (nounit) { i__2 = jx; bla_c_div(&q__1, &x[jx], &ap[kk]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); } i__2 = jx; - bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); + bli_tsets( c,c, (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); ix = jx; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { @@ -323,9 +323,9 @@ i__3 = ix; i__4 = ix; i__5 = k; - bli_csets( (bli_creal(temp) * bli_creal(ap[i__5]) - bli_cimag(temp) * bli_cimag(ap[i__5])), (bli_creal(temp) * bli_cimag(ap[i__5]) + bli_cimag(temp) * bli_creal(ap[i__5])), q__2 ); - bli_csets( (bli_creal(x[i__4]) - bli_creal(q__2)), (bli_cimag(x[i__4]) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); + bli_tsets( c,c, (bli_creal(temp) * bli_creal(ap[i__5]) - bli_cimag(temp) * bli_cimag(ap[i__5])), (bli_creal(temp) * bli_cimag(ap[i__5]) + bli_cimag(temp) * bli_creal(ap[i__5])), q__2 ); + bli_tsets( c,c, (bli_creal(x[i__4]) - bli_creal(q__2)), (bli_cimag(x[i__4]) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); /* L70: */ } } @@ -345,42 +345,42 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; - bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); + bli_tsets( c,c, (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); k = kk; if (noconj) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = i__; - bli_csets( (bli_creal(ap[i__3]) * bli_creal(x[i__4]) - bli_cimag(ap[i__3]) * bli_cimag(x[i__4])), (bli_creal(ap[i__3]) * bli_cimag(x[i__4]) + bli_cimag(ap[i__3]) * bli_creal(x[i__4])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(ap[i__3]) * bli_creal(x[i__4]) - bli_cimag(ap[i__3]) * bli_cimag(x[i__4])), (bli_creal(ap[i__3]) * bli_cimag(x[i__4]) + bli_cimag(ap[i__3]) * bli_creal(x[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ++k; /* L90: */ } if (nounit) { bla_c_div(&q__1, &temp, &ap[kk + j - 1]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { bla_r_cnjg(&q__3, &ap[k]); i__3 = i__; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ++k; /* L100: */ } if (nounit) { bla_r_cnjg(&q__2, &ap[kk + j - 1]); bla_c_div(&q__1, &temp, &q__2); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__2 = j; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); kk += j; /* L110: */ } @@ -389,42 +389,42 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; - bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); + bli_tsets( c,c, (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); ix = kx; if (noconj) { i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = k; i__4 = ix; - bli_csets( (bli_creal(ap[i__3]) * bli_creal(x[i__4]) - bli_cimag(ap[i__3]) * bli_cimag(x[i__4])), (bli_creal(ap[i__3]) * bli_cimag(x[i__4]) + bli_cimag(ap[i__3]) * bli_creal(x[i__4])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(ap[i__3]) * bli_creal(x[i__4]) - bli_cimag(ap[i__3]) * bli_cimag(x[i__4])), (bli_creal(ap[i__3]) * bli_cimag(x[i__4]) + bli_cimag(ap[i__3]) * bli_creal(x[i__4])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L120: */ } if (nounit) { bla_c_div(&q__1, &temp, &ap[kk + j - 1]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { bla_r_cnjg(&q__3, &ap[k]); i__3 = ix; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L130: */ } if (nounit) { bla_r_cnjg(&q__2, &ap[kk + j - 1]); bla_c_div(&q__1, &temp, &q__2); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__2 = jx; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); jx += *incx; kk += j; /* L140: */ @@ -435,42 +435,42 @@ if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; - bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); + bli_tsets( c,c, (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); k = kk; if (noconj) { i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { i__2 = k; i__3 = i__; - bli_csets( (bli_creal(ap[i__2]) * bli_creal(x[i__3]) - bli_cimag(ap[i__2]) * bli_cimag(x[i__3])), (bli_creal(ap[i__2]) * bli_cimag(x[i__3]) + bli_cimag(ap[i__2]) * bli_creal(x[i__3])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(ap[i__2]) * bli_creal(x[i__3]) - bli_cimag(ap[i__2]) * bli_cimag(x[i__3])), (bli_creal(ap[i__2]) * bli_cimag(x[i__3]) + bli_cimag(ap[i__2]) * bli_creal(x[i__3])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); --k; /* L150: */ } if (nounit) { bla_c_div(&q__1, &temp, &ap[kk - *n + j]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { bla_r_cnjg(&q__3, &ap[k]); i__2 = i__; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); --k; /* L160: */ } if (nounit) { bla_r_cnjg(&q__2, &ap[kk - *n + j]); bla_c_div(&q__1, &temp, &q__2); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__1 = j; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__1] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__1] ); kk -= *n - j + 1; /* L170: */ } @@ -479,42 +479,42 @@ jx = kx; for (j = *n; j >= 1; --j) { i__1 = jx; - bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); + bli_tsets( c,c, (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); ix = kx; if (noconj) { i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { i__2 = k; i__3 = ix; - bli_csets( (bli_creal(ap[i__2]) * bli_creal(x[i__3]) - bli_cimag(ap[i__2]) * bli_cimag(x[i__3])), (bli_creal(ap[i__2]) * bli_cimag(x[i__3]) + bli_cimag(ap[i__2]) * bli_creal(x[i__3])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(ap[i__2]) * bli_creal(x[i__3]) - bli_cimag(ap[i__2]) * bli_cimag(x[i__3])), (bli_creal(ap[i__2]) * bli_cimag(x[i__3]) + bli_cimag(ap[i__2]) * bli_creal(x[i__3])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix -= *incx; /* L180: */ } if (nounit) { bla_c_div(&q__1, &temp, &ap[kk - *n + j]); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { bla_r_cnjg(&q__3, &ap[k]); i__2 = ix; - bli_csets( (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); - bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); + bli_tsets( c,c, (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix -= *incx; /* L190: */ } if (nounit) { bla_r_cnjg(&q__2, &ap[kk - *n + j]); bla_c_div(&q__1, &temp, &q__2); - bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); + bli_tsets( c,c, (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__1 = jx; - bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__1] ); + bli_tsets( c,c, (bli_creal(temp)), (bli_cimag(temp)), x[i__1] ); jx -= *incx; kk -= *n - j + 1; /* L200: */ @@ -1420,18 +1420,18 @@ if (nounit) { i__1 = j; bla_z_div(&z__1, &x[j], &ap[kk]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); } i__1 = j; - bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); k = kk - 1; for (i__ = j - 1; i__ >= 1; --i__) { i__1 = i__; i__2 = i__; i__3 = k; - bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__3]) - bli_zimag(temp) * bli_zimag(ap[i__3])), (bli_zreal(temp) * bli_zimag(ap[i__3]) + bli_zimag(temp) * bli_zreal(ap[i__3])), z__2 ); - bli_zsets( (bli_zreal(x[i__2]) - bli_zreal(z__2)), (bli_zimag(x[i__2]) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(ap[i__3]) - bli_zimag(temp) * bli_zimag(ap[i__3])), (bli_zreal(temp) * bli_zimag(ap[i__3]) + bli_zimag(temp) * bli_zreal(ap[i__3])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__2]) - bli_zreal(z__2)), (bli_zimag(x[i__2]) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); --k; /* L10: */ } @@ -1447,10 +1447,10 @@ if (nounit) { i__1 = jx; bla_z_div(&z__1, &x[jx], &ap[kk]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); } i__1 = jx; - bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); ix = jx; i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { @@ -1458,9 +1458,9 @@ i__2 = ix; i__3 = ix; i__4 = k; - bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__4]) - bli_zimag(temp) * bli_zimag(ap[i__4])), (bli_zreal(temp) * bli_zimag(ap[i__4]) + bli_zimag(temp) * bli_zreal(ap[i__4])), z__2 ); - bli_zsets( (bli_zreal(x[i__3]) - bli_zreal(z__2)), (bli_zimag(x[i__3]) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(ap[i__4]) - bli_zimag(temp) * bli_zimag(ap[i__4])), (bli_zreal(temp) * bli_zimag(ap[i__4]) + bli_zimag(temp) * bli_zreal(ap[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__3]) - bli_zreal(z__2)), (bli_zimag(x[i__3]) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); /* L30: */ } } @@ -1479,19 +1479,19 @@ if (nounit) { i__2 = j; bla_z_div(&z__1, &x[j], &ap[kk]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); } i__2 = j; - bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; - bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__5]) - bli_zimag(temp) * bli_zimag(ap[i__5])), (bli_zreal(temp) * bli_zimag(ap[i__5]) + bli_zimag(temp) * bli_zreal(ap[i__5])), z__2 ); - bli_zsets( (bli_zreal(x[i__4]) - bli_zreal(z__2)), (bli_zimag(x[i__4]) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(ap[i__5]) - bli_zimag(temp) * bli_zimag(ap[i__5])), (bli_zreal(temp) * bli_zimag(ap[i__5]) + bli_zimag(temp) * bli_zreal(ap[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__4]) - bli_zreal(z__2)), (bli_zimag(x[i__4]) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); ++k; /* L50: */ } @@ -1508,10 +1508,10 @@ if (nounit) { i__2 = jx; bla_z_div(&z__1, &x[jx], &ap[kk]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); } i__2 = jx; - bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); ix = jx; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { @@ -1519,9 +1519,9 @@ i__3 = ix; i__4 = ix; i__5 = k; - bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__5]) - bli_zimag(temp) * bli_zimag(ap[i__5])), (bli_zreal(temp) * bli_zimag(ap[i__5]) + bli_zimag(temp) * bli_zreal(ap[i__5])), z__2 ); - bli_zsets( (bli_zreal(x[i__4]) - bli_zreal(z__2)), (bli_zimag(x[i__4]) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); + bli_tsets( z,z, (bli_zreal(temp) * bli_zreal(ap[i__5]) - bli_zimag(temp) * bli_zimag(ap[i__5])), (bli_zreal(temp) * bli_zimag(ap[i__5]) + bli_zimag(temp) * bli_zreal(ap[i__5])), z__2 ); + bli_tsets( z,z, (bli_zreal(x[i__4]) - bli_zreal(z__2)), (bli_zimag(x[i__4]) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); /* L70: */ } } @@ -1541,42 +1541,42 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; - bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); k = kk; if (noconj) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = i__; - bli_zsets( (bli_zreal(ap[i__3]) * bli_zreal(x[i__4]) - bli_zimag(ap[i__3]) * bli_zimag(x[i__4])), (bli_zreal(ap[i__3]) * bli_zimag(x[i__4]) + bli_zimag(ap[i__3]) * bli_zreal(x[i__4])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(ap[i__3]) * bli_zreal(x[i__4]) - bli_zimag(ap[i__3]) * bli_zimag(x[i__4])), (bli_zreal(ap[i__3]) * bli_zimag(x[i__4]) + bli_zimag(ap[i__3]) * bli_zreal(x[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ++k; /* L90: */ } if (nounit) { bla_z_div(&z__1, &temp, &ap[kk + j - 1]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { bla_d_cnjg(&z__3, &ap[k]); i__3 = i__; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ++k; /* L100: */ } if (nounit) { bla_d_cnjg(&z__2, &ap[kk + j - 1]); bla_z_div(&z__1, &temp, &z__2); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__2 = j; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); kk += j; /* L110: */ } @@ -1585,42 +1585,42 @@ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; - bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); ix = kx; if (noconj) { i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = k; i__4 = ix; - bli_zsets( (bli_zreal(ap[i__3]) * bli_zreal(x[i__4]) - bli_zimag(ap[i__3]) * bli_zimag(x[i__4])), (bli_zreal(ap[i__3]) * bli_zimag(x[i__4]) + bli_zimag(ap[i__3]) * bli_zreal(x[i__4])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(ap[i__3]) * bli_zreal(x[i__4]) - bli_zimag(ap[i__3]) * bli_zimag(x[i__4])), (bli_zreal(ap[i__3]) * bli_zimag(x[i__4]) + bli_zimag(ap[i__3]) * bli_zreal(x[i__4])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L120: */ } if (nounit) { bla_z_div(&z__1, &temp, &ap[kk + j - 1]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { bla_d_cnjg(&z__3, &ap[k]); i__3 = ix; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L130: */ } if (nounit) { bla_d_cnjg(&z__2, &ap[kk + j - 1]); bla_z_div(&z__1, &temp, &z__2); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__2 = jx; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); jx += *incx; kk += j; /* L140: */ @@ -1631,42 +1631,42 @@ if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; - bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); k = kk; if (noconj) { i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { i__2 = k; i__3 = i__; - bli_zsets( (bli_zreal(ap[i__2]) * bli_zreal(x[i__3]) - bli_zimag(ap[i__2]) * bli_zimag(x[i__3])), (bli_zreal(ap[i__2]) * bli_zimag(x[i__3]) + bli_zimag(ap[i__2]) * bli_zreal(x[i__3])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(ap[i__2]) * bli_zreal(x[i__3]) - bli_zimag(ap[i__2]) * bli_zimag(x[i__3])), (bli_zreal(ap[i__2]) * bli_zimag(x[i__3]) + bli_zimag(ap[i__2]) * bli_zreal(x[i__3])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); --k; /* L150: */ } if (nounit) { bla_z_div(&z__1, &temp, &ap[kk - *n + j]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { bla_d_cnjg(&z__3, &ap[k]); i__2 = i__; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); --k; /* L160: */ } if (nounit) { bla_d_cnjg(&z__2, &ap[kk - *n + j]); bla_z_div(&z__1, &temp, &z__2); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__1 = j; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__1] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__1] ); kk -= *n - j + 1; /* L170: */ } @@ -1675,42 +1675,42 @@ jx = kx; for (j = *n; j >= 1; --j) { i__1 = jx; - bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); + bli_tsets( z,z, (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); ix = kx; if (noconj) { i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { i__2 = k; i__3 = ix; - bli_zsets( (bli_zreal(ap[i__2]) * bli_zreal(x[i__3]) - bli_zimag(ap[i__2]) * bli_zimag(x[i__3])), (bli_zreal(ap[i__2]) * bli_zimag(x[i__3]) + bli_zimag(ap[i__2]) * bli_zreal(x[i__3])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(ap[i__2]) * bli_zreal(x[i__3]) - bli_zimag(ap[i__2]) * bli_zimag(x[i__3])), (bli_zreal(ap[i__2]) * bli_zimag(x[i__3]) + bli_zimag(ap[i__2]) * bli_zreal(x[i__3])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix -= *incx; /* L180: */ } if (nounit) { bla_z_div(&z__1, &temp, &ap[kk - *n + j]); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { bla_d_cnjg(&z__3, &ap[k]); i__2 = ix; - bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); - bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); + bli_tsets( z,z, (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix -= *incx; /* L190: */ } if (nounit) { bla_d_cnjg(&z__2, &ap[kk - *n + j]); bla_z_div(&z__1, &temp, &z__2); - bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); + bli_tsets( z,z, (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__1 = jx; - bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__1] ); + bli_tsets( z,z, (bli_zreal(temp)), (bli_zimag(temp)), x[i__1] ); jx -= *incx; kk -= *n - j + 1; /* L200: */ diff --git a/frame/compat/f2c/util/bla_c_div.c b/frame/compat/f2c/util/bla_c_div.c index 975f49b0a4..c991f7bc13 100644 --- a/frame/compat/f2c/util/bla_c_div.c +++ b/frame/compat/f2c/util/bla_c_div.c @@ -38,8 +38,8 @@ void bla_c_div(bla_scomplex *cp, const bla_scomplex *ap, const bla_scomplex *bp) { - bli_ccopys( *ap, *cp ); - bli_cinvscals( *bp, *cp ); + bli_tcopys( c,c, *ap, *cp ); + bli_tinvscals( c,c,c, *bp, *cp ); } #endif diff --git a/frame/compat/f2c/util/bla_d_cnjg.c b/frame/compat/f2c/util/bla_d_cnjg.c index 43dc9758c3..f6df81cf2f 100644 --- a/frame/compat/f2c/util/bla_d_cnjg.c +++ b/frame/compat/f2c/util/bla_d_cnjg.c @@ -38,7 +38,7 @@ void bla_d_cnjg(bla_dcomplex *dest, const bla_dcomplex *src) { - bli_zcopyjs( *src, *dest ); + bli_tcopyjs( z,z, *src, *dest ); } #endif diff --git a/frame/compat/f2c/util/bla_r_cnjg.c b/frame/compat/f2c/util/bla_r_cnjg.c index 42b25d5757..497dcfa38b 100644 --- a/frame/compat/f2c/util/bla_r_cnjg.c +++ b/frame/compat/f2c/util/bla_r_cnjg.c @@ -38,7 +38,7 @@ void bla_r_cnjg(bla_scomplex *dest, const bla_scomplex *src) { - bli_ccopyjs( *src, *dest ); + bli_tcopyjs( c,c, *src, *dest ); } #endif diff --git a/frame/compat/f2c/util/bla_z_div.c b/frame/compat/f2c/util/bla_z_div.c index 3d36a8ac89..80cf30fbdd 100644 --- a/frame/compat/f2c/util/bla_z_div.c +++ b/frame/compat/f2c/util/bla_z_div.c @@ -38,8 +38,8 @@ void bla_z_div(bla_dcomplex *cp, const bla_dcomplex *ap, const bla_dcomplex *bp) { - bli_zcopys( *ap, *cp ); - bli_zinvscals( *bp, *cp ); + bli_tcopys( z,z, *ap, *cp ); + bli_tinvscals( z,z,z, *bp, *cp ); } #endif diff --git a/frame/include/bli_cast_macro_defs.h b/frame/include/bli_cast_macro_defs.h new file mode 100644 index 0000000000..3033276ff6 --- /dev/null +++ b/frame/include/bli_cast_macro_defs.h @@ -0,0 +1,529 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_CAST_MACRO_DEFS_H +#define BLIS_CAST_MACRO_DEFS_H + +// -- Typecast { bfloat16 | float | double } to bfloat16 ----------------------- + +#ifdef BFLOAT +BLIS_INLINE bfloat bli_bbcast( bfloat b ) +{ + return b; +} +#endif + +#ifdef BFLOAT +BLIS_INLINE bfloat bli_sbcast( float s ) +{ + bfloat b; + + // View the float as a char array. + char* s_ch = ( char* )&s; + + // Copy upper two bytes of float to a local bfloat16. + memcpy( &b, &s_ch[2], 2 ); + + return b; +} +#endif + +#ifdef BFLOAT +BLIS_INLINE bfloat bli_dbcast( double d ) +{ + bfloat b; + + // Typecast double input argument to a local float. + float s = ( float )d; + + // View the float as a char array. + char* s_ch = ( char* )&s; + + // Copy upper two bytes of float to a local bfloat16. + memcpy( &b, &s_ch[2], 2 ); + + return b; +} +#endif + +// -- Typecast { bfloat16 | float | double | int } to float -------------------------- + +#ifdef BFLOAT +BLIS_INLINE float bli_bscast( bfloat b ) +{ + // Initialize all bits in a local float to zero. + float s = 0.0F; + + // View the float as a char array. + char* s_ch = ( char* )&s; + + // Copy bfloat16 to the upper two bytes of a local float. + memcpy( &s_ch[2], &b, 2 ); + + return s; +} +#endif + +BLIS_INLINE float bli_sscast( float s ) +{ + return s; +} + +BLIS_INLINE float bli_dscast( double d ) +{ + return ( float )d; +} + +BLIS_INLINE float bli_iscast( dim_t i ) +{ + return ( float )i; +} + +// -- Typecast { bfloat16 | float | double | int } to double ------------------------- + +#ifdef BFLOAT +BLIS_INLINE double bli_bdcast( bfloat b ) +{ + // Initialize all bits in a local float to zero. + float s = 0.0F; + + // View the float as a char array. + char* s_ch = ( char* )&s; + + // Copy bfloat16 to the upper two bytes of a local float. + memcpy( &s_ch[2], &b, 2 ); + + return ( double )s; +} +#endif + +BLIS_INLINE double bli_sdcast( float s ) +{ + return ( double )s; +} + +//#if 1 +BLIS_INLINE double bli_ddcast( double d ) +{ + return d; +} +//#else +//#define bli_ddcast( d ) ( d ) +//#endif + +BLIS_INLINE double bli_idcast( dim_t i ) +{ + return ( double )i; +} + +// -- Typecast { float | double | int } to int ------------------------- + +BLIS_INLINE dim_t bli_sicast( float s ) +{ + return ( dim_t )s; +} + +BLIS_INLINE dim_t bli_dicast( double d ) +{ + return ( dim_t )d; +} + +BLIS_INLINE dim_t bli_iicast( dim_t i ) +{ + return i; +} + +#if 0 +// -- Fused real/imag accessor + typecast -------------------------------------- + +// Generate static functions that fuse two operations: +// - accessing the real and imaginary components of all datatypes (real +// and complex) +// - typecasting a real (or imaginary) component to any real datatype +// Examples: +// static float bli_dreals( double a ) { return bli_dscast( bli_dreal( a ) ); } +// static double bli_sreald( float a ) { return bli_sdcast( bli_sreal( a ) ); } +// static float bli_creals( scomplex a ) { return bli_sscast( bli_creal( a ) ); } +// static double bli_cimagd( scomplex a ) { return bli_sdcast( bli_cimag( a ) ); } + +#undef GENTFUNC +#define GENTFUNC( chi, cho ) \ +\ +BLIS_INLINE PASTEMAC(cho,ctype) PASTEMAC2(chi,real,cho)( PASTEMAC(chi,ctype) a ) \ +{ \ + return PASTEMAC2(chi,cho,cast)( PASTEMAC(chi,real)( a ) ); \ +} \ +BLIS_INLINE PASTEMAC(cho,ctype) PASTEMAC2(chi,imag,cho)( PASTEMAC(chi,ctype) a ) \ +{ \ + return PASTEMAC2(chi,cho,cast)( PASTEMAC(chi,imag)( a ) ); \ +} + +// NOTE: We only have to generate functions that output to types [bsd] because +// these macros only need to output real types. The composition that allows +// complex types will be handled by the consumers to these bli_?[real|imag]?() +// functions. + +// [bsdkcz][bsd] + +GENTFUNC( b, b ) +GENTFUNC( s, b ) +GENTFUNC( d, b ) +GENTFUNC( k, b ) +GENTFUNC( c, b ) +GENTFUNC( z, b ) + +GENTFUNC( b, s ) +GENTFUNC( s, s ) +GENTFUNC( d, s ) +GENTFUNC( k, s ) +GENTFUNC( c, s ) +GENTFUNC( z, s ) + +GENTFUNC( b, d ) +GENTFUNC( s, d ) +GENTFUNC( d, d ) +GENTFUNC( k, d ) +GENTFUNC( c, d ) +GENTFUNC( z, d ) +#endif + +// bli_xytcast() macros are only used in the definitions of level0 scalar +// macros. There, we use a different name from the actual cast functions-- +// which are named using the format bli_xycast()--so that we can optionally +// replace them as part of the optimization below without distrubing any +// other uses of bli_xycast() that should not be changed. + +#define bli_bbtcast bli_bbcast +#define bli_sbtcast bli_sbcast +#define bli_dbtcast bli_dbcast +#define bli_kbtcast bli_kbcast +#define bli_cbtcast bli_cbcast +#define bli_zbtcast bli_zbcast + +#define bli_bstcast bli_bscast +#define bli_sstcast bli_sscast +#define bli_dstcast bli_dscast +#define bli_kstcast bli_kscast +#define bli_cstcast bli_cscast +#define bli_zstcast bli_zscast +#define bli_istcast bli_iscast + +#define bli_bdtcast bli_bdcast +#define bli_sdtcast bli_sdcast +#define bli_ddtcast bli_ddcast +#define bli_kdtcast bli_kdcast +#define bli_cdtcast bli_cdcast +#define bli_zdtcast bli_zdcast +#define bli_idtcast bli_idcast + +#define bli_sitcast bli_sicast +#define bli_ditcast bli_dicast +#define bli_iitcast bli_iicast + +// An optimization. In situations where computations would normally occur +// in bfloat, redundant typecasting may occur. For example, in the case of +// performing ssbbaxpy (a and x stored in type s; y stored in type b; +// compute in b), a and x would normally be typecast to b so that all +// operands are in the computation precision (namely, bfloat), but since +// our reference implementation implements bfloat flops in terms of float +// flops, all operands would need to be typecast back to s anyway just so +// the computation can take place. This means that a and x were truncated +// down to bfloat (and thus lost precision) somewhat unnecessarily. Instead, +// what could happen is that a and x remain in s, y is typecast to s, +// computation would take place in s, and then the result is truncated to +// bfloat on output to y. These macros substitute certain static function +// calls to be the equivalent calls that would cast to float instead of +// bfloat. +#ifdef BLIS_OPTIMIZE_BFLOAT_AS_FLOAT + +#undef bli_bbcast +#define bli_bbcast bli_bscast +#undef bli_sbcast +#define bli_sbcast bli_sscast +#undef bli_dbcast +#define bli_dbcast bli_dscast +#undef bli_kbcast +#define bli_kbcast bli_kscast +#undef bli_cbcast +#define bli_cbcast bli_cscast +#undef bli_zbcast +#define bli_zbcast bli_zscast + +#endif + + +// -- Basic constants (per precision) ------------------------------------------ + +#ifdef BLIS_OPTIMIZE_BFLOAT_AS_FLOAT + +#define bli_btwo bli_stwo +#define bli_bone bli_sone +#define bli_bzero bli_szero +#define bli_bmone bli_smone +#define bli_bmtwo bli_smtwo + +#else + +#define bli_btwo bli_sbcast( bli_stwo ) +#define bli_bone bli_sbcast( bli_sone ) +#define bli_bzero bli_sbcast( bli_szero ) +#define bli_bmone bli_sbcast( bli_smone ) +#define bli_bmtwo bli_sbcast( bli_smtwo ) + +#endif + +#define bli_stwo 2.0F +#define bli_sone 1.0F +#define bli_szero 0.0F +#define bli_smone -1.0F +#define bli_smtwo -2.0F + +#define bli_dtwo 2.0 +#define bli_done 1.0 +#define bli_dzero 0.0 +#define bli_dmone -1.0 +#define bli_dmtwo -2.0 + +// -- Basic arithmetic operations (per precision) ------------------------------ + +#ifdef BLIS_OPTIMIZE_BFLOAT_AS_FLOAT + +#define bli_bmul( a, b ) bli_smul( a, b ) +#define bli_bdiv( a, b ) bli_sdiv( a, b ) +#define bli_badd( a, b ) bli_sadd( a, b ) +#define bli_bsub( a, b ) bli_ssub( a, b ) +#define bli_bneg( a ) bli_sneg( a ) +#define bli_bsqrt( a ) bli_ssqrt( a ) +#define bli_bhypot( a, b ) bli_shypot( a, b ) + +#else + +#define bli_bmul( a, b ) bli_sbcast( bli_smul( bli_bscast(a), bli_bscast(b) ) ) +#define bli_bdiv( a, b ) bli_sbcast( bli_sdiv( bli_bscast(a), bli_bscast(b) ) ) +#define bli_badd( a, b ) bli_sbcast( bli_sadd( bli_bscast(a), bli_bscast(b) ) ) +#define bli_bsub( a, b ) bli_sbcast( bli_ssub( bli_bscast(a), bli_bscast(b) ) ) +#define bli_bneg( a ) bli_sbcast( bli_sneg( bli_bscast(a) ) ) +#define bli_bsqrt( a ) bli_sbcast( bli_ssqrt( bli_bscast(a) ) ) +#define bli_bhypot( a, b ) bli_sbcast( bli_shypot( bli_bscast(a), bli_bscast(b) ) ) + +#endif + +#define bli_smul( a, b ) (a) * (b) +#define bli_sdiv( a, b ) (a) / (b) +#define bli_sadd( a, b ) (a) + (b) +#define bli_ssub( a, b ) (a) - (b) +#define bli_sneg( a ) -(a) +#define bli_ssqrt( a ) sqrtf(a) +#define bli_shypot( a, b ) hypotf(a,b) + +#define bli_dmul( a, b ) (a) * (b) +#define bli_ddiv( a, b ) (a) / (b) +#define bli_dadd( a, b ) (a) + (b) +#define bli_dsub( a, b ) (a) - (b) +#define bli_dneg( a ) -(a) +#define bli_dsqrt( a ) sqrt(a) +#define bli_dhypot( a, b ) hypot(a,b) + +// -- Basic compare operations (per precision) --------------------------------- + +#ifdef BLIS_OPTIMIZE_BFLOAT_AS_FLOAT + +#define bli_beq( a, b ) bli_seq( a, b ) +#define bli_blt( a, b ) bli_slt( a, b ) +#define bli_ble( a, b ) bli_sle( a, b ) +#define bli_bgt( a, b ) bli_sgt( a, b ) +#define bli_bge( a, b ) bli_sge( a, b ) + +#else + +#define bli_beq( a, b ) bli_sbcast( bli_seq( bli_bscast(a), bli_bscast(b) ) ) +#define bli_blt( a, b ) bli_sbcast( bli_slt( bli_bscast(a), bli_bscast(b) ) ) +#define bli_ble( a, b ) bli_sbcast( bli_sle( bli_bscast(a), bli_bscast(b) ) ) +#define bli_bgt( a, b ) bli_sbcast( bli_sgt( bli_bscast(a), bli_bscast(b) ) ) +#define bli_bge( a, b ) bli_sbcast( bli_sge( bli_bscast(a), bli_bscast(b) ) ) + +#endif + +#define bli_seq( a, b ) ( a == b ) +#define bli_slt( a, b ) ( a < b ) +#define bli_sle( a, b ) ( a <= b ) +#define bli_sgt( a, b ) ( a > b ) +#define bli_sge( a, b ) ( a >= b ) + +#define bli_deq( a, b ) ( a == b ) +#define bli_dlt( a, b ) ( a < b ) +#define bli_dle( a, b ) ( a <= b ) +#define bli_dgt( a, b ) ( a > b ) +#define bli_dge( a, b ) ( a >= b ) + +#define bli_ieq( a, b ) ( a == b ) +#define bli_ilt( a, b ) ( a < b ) +#define bli_ile( a, b ) ( a <= b ) +#define bli_igt( a, b ) ( a > b ) +#define bli_ige( a, b ) ( a >= b ) + +// -- Min/max/abs/etc. operations (per precision) ------------------------------ + +#ifdef BLIS_OPTIMIZE_BFLOAT_AS_FLOAT + +#define bli_bmin( a, b ) bli_smin( a, b ) +#define bli_bmax( a, b ) bli_smax( a, b ) +#define bli_babs( a ) bli_sabs( a ) +#define bli_bminabs( a, b ) bli_sminabs( a b ) +#define bli_bmaxabs( a, b ) bli_smaxabs( a b ) +#define bli_bcopysign( a, b ) ( bli_slt( b , bli_szero ) \ + ? bli_sneg( bli_sabs( a ) ) \ + : bli_sabs( a ) ) + +#else + +#define bli_bmin( a, b ) bli_sbcast( bli_smin( bli_bscast(a), bli_bscast(b) ) ) +#define bli_bmax( a, b ) bli_sbcast( bli_smax( bli_bscast(a), bli_bscast(b) ) ) +#define bli_babs( a ) bli_sbcast( bli_sabs( bli_bscast(a) ) ) +#define bli_bminabs( a, b ) bli_sbcast( bli_sminabs( bli_bscast(a), bli_bscast(b) ) ) +#define bli_bmaxabs( a, b ) bli_sbcast( bli_smaxabs( bli_bscast(a), bli_bscast(b) ) ) +#define bli_bcopysign( a, b ) bli_sbcast( bli_slt( bli_bscast(b), bli_szero ) \ + ? bli_sneg( bli_sabs( bli_bscast(a) ) ) \ + : bli_sabs( bli_bscast(a) ) ) + +#endif + +#define bli_smin( a, b ) ( bli_slt( a, b ) ? a : b ) +#define bli_smax( a, b ) ( bli_sgt( a, b ) ? a : b ) +//#define bli_sabs( a ) ( bli_slt( a, PASTEMAC(s,zero) ) ? -(a) : a ) +#define bli_sabs( a ) ( fabsf(a) ) +#define bli_sminabs( a, b ) bli_smin( bli_sabs( a ), bli_sabs( b ) ) +#define bli_smaxabs( a, b ) bli_smax( bli_sabs( a ), bli_sabs( b ) ) +#define bli_scopysign( a, b ) ( copysignf( a, b ) ) \ + +#define bli_dmin( a, b ) ( bli_dlt( a, b ) ? a : b ) +#define bli_dmax( a, b ) ( bli_dgt( a, b ) ? a : b ) +//#define bli_dabs( a ) ( bli_dlt( a, PASTEMAC(d,zero) ) ? -(a) : a ) +#define bli_dabs( a ) ( fabs(a) ) +#define bli_dminabs( a, b ) bli_dmin( bli_dabs( a ), bli_dabs( b ) ) +#define bli_dmaxabs( a, b ) bli_dmax( bli_dabs( a ), bli_dabs( b ) ) +#define bli_dcopysign( a, b ) ( copysign( a, b ) ) \ + +// -- Infinity/NaN check (per precision) --------------------------------------- + +#ifdef BLIS_OPTIMIZE_BFLOAT_AS_FLOAT + +#define bli_bisinf( a ) bli_sisinf( a ) +#define bli_bisnan( a ) bli_sisnan( a ) + +#else + +#define bli_bisinf( a ) bli_sisinf( bli_bscast(a) ) +#define bli_bisnan( a ) bli_sisnan( bli_bscast(a) ) + +#endif + +#define bli_sisinf( a ) isinf( a ) +#define bli_sisnan( a ) isnan( a ) + +#define bli_disinf( a ) isinf( a ) +#define bli_disnan( a ) isnan( a ) + +// -- Randomization operations (per precision) --------------------------------- + +#define bli_brand bli_dbcast( bli_rand() ) +#define bli_srand bli_dscast( bli_rand() ) +#define bli_drand bli_ddcast( bli_rand() ) + +// Randomize a real number on the interval [-1.0,1.0] and return it as a double. +BLIS_INLINE double bli_rand( void ) +{ + return ( ( ( double ) rand() ) / + ( ( double ) RAND_MAX / 2.0 ) + ) - 1.0; +} + +#define bli_brandnp2 bli_dbcast( bli_randnp2s() ) +#define bli_srandnp2 bli_dscast( bli_randnp2s() ) +#define bli_drandnp2 bli_ddcast( bli_randnp2s() ) + +// Randomize a power of two on a narrow range and return it as a double. +BLIS_INLINE double bli_randnp2s( void ) +{ + const double m_max = 6.0; + const double m_max2 = m_max + 2.0; + double t; + double r_val; + + // Compute a narrow-range power of two. + // + // For the purposes of commentary, we'll assume that m_max = 4. This + // represents the largest power of two we will use to generate the + // random numbers. + + do + { + // Generate a random real number t on the interval: [0.0, 6.0]. + t = ( ( double ) rand() / ( double ) RAND_MAX ) * m_max2; + + // Transform the interval into the set of integers, {0,1,2,3,4,5}. + // Note that 6 is prohibited by the loop guard below. + t = floor( t ); + } + // If t is ever equal to m_max2, we re-randomize. The guard against + // m_max2 < t is for sanity and shouldn't happen, unless perhaps there + // is weirdness in the typecasting to double when computing t above. + while ( m_max2 <= t ); + + // Map values of t == 0 to a final value of 0. + if ( t == 0.0 ) r_val = 0.0; + else + { + // This case handles values of t = {1,2,3,4,5}. + + // Compute r_val = 2^s where s = -(t-1) = {-4,-3,-2,-1,0}. + r_val = pow( 2.0, -(t - 1.0) ); + + // Compute a random number to determine the sign of the final + // result. + const double s_val = PASTEMAC(d,rand); + + // If our sign value is negative, our random power of two will + // be negative. + if ( s_val < 0.0 ) r_val = -r_val; + } + + // r_val = 0, or +/-{2^0, 2^-1, 2^-2, 2^-3, 2^-4}. + return r_val; +} + + + +#endif + diff --git a/frame/include/bli_complex_macro_defs.h b/frame/include/bli_complex_macro_defs.h index f9e22ef0a8..6b2e4a27e4 100644 --- a/frame/include/bli_complex_macro_defs.h +++ b/frame/include/bli_complex_macro_defs.h @@ -43,6 +43,8 @@ #define bli_simag( x ) ( 0.0F ) #define bli_dreal( x ) ( x ) #define bli_dimag( x ) ( 0.0 ) +#define bli_ireal( x ) ( x ) +#define bli_iimag( x ) ( 0 ) #if defined(__cplusplus) && defined(BLIS_ENABLE_STD_COMPLEX) diff --git a/frame/include/bli_edge_case_macro_defs.h b/frame/include/bli_edge_case_macro_defs.h index ad72e75147..bef0927472 100644 --- a/frame/include/bli_edge_case_macro_defs.h +++ b/frame/include/bli_edge_case_macro_defs.h @@ -56,7 +56,7 @@ #define GEMM_UKR_SETUP_CT_POST(ch) \ \ PASTEMAC(ch,ctype) _zero; \ - PASTEMAC(ch,set0s)( _zero ); \ + bli_tset0s( ch, _zero ); \ \ if ( _use_ct ) \ { \ @@ -117,8 +117,9 @@ microtile. */ \ if ( _use_ct ) \ { \ - PASTEMAC(ch,xpbys_mxn) \ + bli_txpbys_mxn \ ( \ + ch,ch,ch,ch, \ m, n, \ _ct, _rs_ct, _cs_ct, \ _beta, \ diff --git a/frame/include/bli_genarray_macro_defs.h b/frame/include/bli_genarray_macro_defs.h index 4ec89d9485..52e59b658a 100644 --- a/frame/include/bli_genarray_macro_defs.h +++ b/frame/include/bli_genarray_macro_defs.h @@ -50,6 +50,16 @@ static tname PASTECH(opname,_fpa)[BLIS_NUM_FP_TYPES] = \ ( tname )PASTEMAC(z,opname) \ } +#define GENARRAYRO_FPA(tname,opname) \ +\ +static tname PASTECH(opname,_fpa)[BLIS_NUM_FP_TYPES] = \ +{ \ + ( tname )PASTEMAC(s,opname), \ + ( tname )PASTEMAC(d,opname), \ + NULL, \ + NULL \ +} + // -- "Smart" one-operand macro (with integer support) -- #define GENARRAY_FPA_I(tname,opname) \ diff --git a/frame/include/bli_gentfunc_macro_defs.h b/frame/include/bli_gentfunc_macro_defs.h index 8074bb4413..aeafd37551 100644 --- a/frame/include/bli_gentfunc_macro_defs.h +++ b/frame/include/bli_gentfunc_macro_defs.h @@ -130,12 +130,12 @@ GENTFUNCR2( dcomplex, double, z, d, blasname, blisname ) #define INSERT_GENTFUNCSCAL_BLAS( blasname, blisname ) \ \ -GENTFUNCSCAL( float, float, s, , blasname, blisname ) \ -GENTFUNCSCAL( double, double, d, , blasname, blisname ) \ -GENTFUNCSCAL( scomplex, scomplex, c, , blasname, blisname ) \ -GENTFUNCSCAL( dcomplex, dcomplex, z, , blasname, blisname ) \ -GENTFUNCSCAL( scomplex, float, c, s, blasname, blisname ) \ -GENTFUNCSCAL( dcomplex, double, z, d, blasname, blisname ) +GENTFUNCSCAL( float, float, s, , s, blasname, blisname ) \ +GENTFUNCSCAL( double, double, d, , d, blasname, blisname ) \ +GENTFUNCSCAL( scomplex, scomplex, c, , c, blasname, blisname ) \ +GENTFUNCSCAL( dcomplex, dcomplex, z, , z, blasname, blisname ) \ +GENTFUNCSCAL( scomplex, float, c, s, s, blasname, blisname ) \ +GENTFUNCSCAL( dcomplex, double, z, d, d, blasname, blisname ) diff --git a/frame/include/bli_gentprot_macro_defs.h b/frame/include/bli_gentprot_macro_defs.h index e733e48003..e661e52351 100644 --- a/frame/include/bli_gentprot_macro_defs.h +++ b/frame/include/bli_gentprot_macro_defs.h @@ -165,6 +165,15 @@ GENTPROTR( dcomplex, double, z, d, __VA_ARGS__ ) +// -- Basic one-operand macro with real domain only -- + +#define INSERT_GENTPROTRO_BASIC( ... ) \ +\ +GENTPROTRO( float, s, __VA_ARGS__ ) \ +GENTPROTRO( double, d, __VA_ARGS__ ) + + + // -- Basic one-operand macro with complex domain only and real projection -- #define INSERT_GENTPROTCO_BASIC( ... ) \ diff --git a/frame/include/bli_macro_defs.h b/frame/include/bli_macro_defs.h index 8af3f5a266..cd4d619e39 100644 --- a/frame/include/bli_macro_defs.h +++ b/frame/include/bli_macro_defs.h @@ -97,6 +97,7 @@ #include "bli_gentconf_macro_defs.h" #include "bli_misc_macro_defs.h" +#include "bli_cast_macro_defs.h" #include "bli_edge_case_macro_defs.h" #include "bli_param_macro_defs.h" #include "bli_complex_macro_defs.h" diff --git a/frame/include/bli_misc_macro_defs.h b/frame/include/bli_misc_macro_defs.h index 98d86a2988..0d41ee9255 100644 --- a/frame/include/bli_misc_macro_defs.h +++ b/frame/include/bli_misc_macro_defs.h @@ -84,14 +84,6 @@ BLIS_INLINE guint_t bli_round_to_mult( guint_t val, guint_t mult ) ); } -// isnan, isinf -// NOTE: These must remain macros, since isinf() and isnan() are macros -// (defined in math.h) that likely depend on the type of the argument 'a' -// below. - -#define bli_isinf( a ) isinf( a ) -#define bli_isnan( a ) isnan( a ) - // is_odd, is_even BLIS_INLINE bool bli_is_odd( gint_t a ) @@ -130,27 +122,50 @@ BLIS_INLINE void bli_toggle_bool( bool* b ) else *b = TRUE; } -// return datatype for char +// return datatype for datatype char #define bli_stype ( BLIS_FLOAT ) #define bli_dtype ( BLIS_DOUBLE ) #define bli_ctype ( BLIS_SCOMPLEX ) #define bli_ztype ( BLIS_DCOMPLEX ) -// return C type for char +// return C type for datatype char #define bli_sctype float #define bli_dctype double #define bli_cctype scomplex #define bli_zctype dcomplex -// return real proj of C type for char +// return C type for domain and precision chars + +#define bli_rsctype float +#define bli_rdctype double +#define bli_csctype scomplex +#define bli_cdctype dcomplex + +// return real proj of C type for datatype char #define bli_sctyper float #define bli_dctyper double #define bli_cctyper float #define bli_zctyper double +// return precision component of dt char + +#define bli_sprec s +#define bli_dprec d +#define bli_cprec s +#define bli_zprec d +#define bli_iprec i + +// return domain component of dt char + +#define bli_sdom r +#define bli_ddom r +#define bli_cdom c +#define bli_zdom c +#define bli_idom r + // return whether or not two types are the same #define bli_sssame 1 diff --git a/frame/include/bli_scalar_macro_defs.h b/frame/include/bli_scalar_macro_defs.h index 2eea517fdd..2d4798a101 100644 --- a/frame/include/bli_scalar_macro_defs.h +++ b/frame/include/bli_scalar_macro_defs.h @@ -36,6 +36,10 @@ #define BLIS_SCALAR_MACRO_DEFS_H +#include "bli_assigns.h" +#include "bli_complex_terms.h" +#include "bli_constants.h" +#include "bli_declinits.h" // -- Assignment/Accessor macros -- @@ -44,210 +48,42 @@ // whether fields of a struct are set directly or whether native C99 // assignment is used). -#include "bli_sets.h" // sets both real and imaginary components - -// NOTE: These macros are not used by other scalar macros, but they are -// related to those defined in bli_sets.h, and so we #include them here. - -#include "bli_setrs.h" // sets real component only -#include "bli_setis.h" // sets imaginary component only +#include "bli_tsets.h" // sets both real and imaginary components // NOTE: This macro also needs to be defined early on since it determines // how real and imaginary components are accessed (ie: whether the fields // of a struct are read directly or whether native C99 functions are used.) -#include "bli_gets.h" - - -// -- Scalar constant initialization macros -- - -#include "bli_constants.h" - - -// -- Separated scalar macros (separated real/imaginary values) -- - -#include "bli_absq2ris.h" - -#include "bli_abval2ris.h" - -#include "bli_addris.h" -#include "bli_addjris.h" - -#include "bli_add3ris.h" - -#include "bli_axpbyris.h" -#include "bli_axpbyjris.h" - -#include "bli_axpyris.h" -#include "bli_axpyjris.h" - -#include "bli_axmyris.h" - -#include "bli_conjris.h" - -#include "bli_copyris.h" -#include "bli_copyjris.h" -#include "bli_copycjris.h" - -#include "bli_eqris.h" - -#include "bli_invertris.h" - -#include "bli_invscalris.h" -#include "bli_invscaljris.h" - -#include "bli_neg2ris.h" - -#include "bli_scalris.h" -#include "bli_scaljris.h" -#include "bli_scalcjris.h" - -#include "bli_scal2ris.h" -#include "bli_scal2jris.h" - -#include "bli_set0ris.h" - -#include "bli_sqrt2ris.h" - -#include "bli_subris.h" -#include "bli_subjris.h" - -#include "bli_swapris.h" - -#include "bli_xpbyris.h" -#include "bli_xpbyjris.h" - -// Inlined scalar macros in loops -#include "bli_scal2ris_mxn.h" -#include "bli_scalris_mxn_uplo.h" - - -// -- Conventional scalar macros (paired real/imaginary values) -- - -#include "bli_absq2s.h" - -#include "bli_abval2s.h" - -#include "bli_adds.h" -#include "bli_addjs.h" - -#include "bli_add3s.h" - -#include "bli_axpbys.h" -#include "bli_axpbyjs.h" - -#include "bli_axpys.h" -#include "bli_axpyjs.h" - -#include "bli_axmys.h" - -#include "bli_conjs.h" - -#include "bli_copys.h" -#include "bli_copyjs.h" -#include "bli_copycjs.h" - -#include "bli_copynzs.h" -#include "bli_copyjnzs.h" - -#include "bli_dots.h" -#include "bli_dotjs.h" - -#include "bli_eq.h" -#include "bli_lt.h" -#include "bli_lte.h" - -#include "bli_fprints.h" - -#include "bli_inverts.h" - -#include "bli_invscals.h" -#include "bli_invscaljs.h" - -#include "bli_neg2s.h" - -#include "bli_rands.h" -#include "bli_randnp2s.h" - -#include "bli_scals.h" -#include "bli_scaljs.h" -#include "bli_scalcjs.h" - -#include "bli_scal2s.h" -#include "bli_scal2js.h" - -#include "bli_set0s.h" - -#include "bli_set1s.h" - -#include "bli_seti0s.h" - -#include "bli_sqrt2s.h" - -#include "bli_subs.h" -#include "bli_subjs.h" - -#include "bli_swaps.h" - -#include "bli_xpbys.h" -#include "bli_xpbyjs.h" - -// Inlined scalar macros in loops -#include "bli_adds_mxn.h" -#include "bli_adds_mxn_uplo.h" -#include "bli_set0s_mxn.h" -#include "bli_set0s_edge.h" -#include "bli_copys_mxn.h" -#include "bli_scal2s_mxn.h" - -#include "bli_axpbys_mxn.h" -#include "bli_xpbys_mxn.h" -#include "bli_xpbys_mxn_uplo.h" - -// -- "broadcast B" scalar macros -- - -#include "bli_bcastbbs_mxn.h" -#include "bli_scal2bbs_mxn.h" -#include "bli_set0bbs_mxn.h" - - -// -- 1m-specific scalar macros -- - -// 1e -#include "bli_copy1es.h" -#include "bli_copyj1es.h" - -#include "bli_invert1es.h" - -#include "bli_scal1es.h" - -#include "bli_scal21es.h" -#include "bli_scal2j1es.h" - -// 1r -#include "bli_copy1rs.h" -#include "bli_copyj1rs.h" - -#include "bli_invert1rs.h" - -#include "bli_scal1rs.h" - -#include "bli_scal21rs.h" -#include "bli_scal2j1rs.h" - -// 1m (1e or 1r) -#include "bli_invert1ms_mxn_diag.h" - -#include "bli_scal1ms_mxn.h" - -#include "bli_scal21ms_mxn.h" -#include "bli_scal21ms_mxn_diag.h" -#include "bli_scal21ms_mxn_uplo.h" - -#include "bli_set1ms_mxn.h" -#include "bli_set1ms_mxn_diag.h" -#include "bli_set1ms_mxn_uplo.h" -#include "bli_seti01ms_mxn_diag.h" +#include "bli_tgets.h" + +// -- Scalar macros -- + +#include "bli_tabsq2s.h" +#include "bli_tabval2s.h" +#include "bli_tadd3s.h" +#include "bli_tadds.h" +#include "bli_taxpbys.h" +#include "bli_taxpys.h" +#include "bli_tconjs.h" +#include "bli_tcopycjs.h" +#include "bli_tcopynzs.h" +#include "bli_tcopys.h" +#include "bli_tdots.h" +#include "bli_teqs.h" +#include "bli_tfprints.h" +#include "bli_tinverts.h" +#include "bli_tinvscals.h" +#include "bli_tneg2s.h" +#include "bli_trandnp2s.h" +#include "bli_trands.h" +#include "bli_tscalcjs.h" +#include "bli_tscal2s.h" +#include "bli_tscals.h" +#include "bli_tsets.h" +#include "bli_tsqrt2s.h" +#include "bli_tsubs.h" +#include "bli_tswaps.h" +#include "bli_txpbys.h" #endif diff --git a/frame/include/level0/1e/bli_copy1es.h b/frame/include/level0/1e/bli_copy1es.h deleted file mode 100644 index 7dc6a493a9..0000000000 --- a/frame/include/level0/1e/bli_copy1es.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPY1ES_H -#define BLIS_COPY1ES_H - -// copy1es - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#define bli_sscopy1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_dscopy1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_cscopy1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_zscopy1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } - -#define bli_sdcopy1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_ddcopy1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_cdcopy1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_zdcopy1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } - -#define bli_sccopy1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_dccopy1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_cccopy1es( a, bri, bir ) \ -{ \ - bli_cccopyris( bli_creal(a), bli_cimag(a), bli_creal(bri), bli_cimag(bri) ); \ - bli_cccopyris( -bli_cimag(a), bli_creal(a), bli_creal(bir), bli_cimag(bir) ); \ -} -#define bli_zccopy1es( a, bri, bir ) \ -{ \ - bli_zccopyris( bli_zreal(a), bli_zimag(a), bli_creal(bri), bli_cimag(bri) ); \ - bli_zccopyris( -bli_zimag(a), bli_zreal(a), bli_creal(bir), bli_cimag(bir) ); \ -} - -#define bli_szcopy1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_dzcopy1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_czcopy1es( a, bri, bir ) \ -{ \ - bli_czcopyris( bli_creal(a), bli_cimag(a), bli_zreal(bri), bli_zimag(bri) ); \ - bli_czcopyris( -bli_cimag(a), bli_creal(a), bli_zreal(bir), bli_zimag(bir) ); \ -} -#define bli_zzcopy1es( a, bri, bir ) \ -{ \ - bli_zzcopyris( bli_zreal(a), bli_zimag(a), bli_zreal(bri), bli_zimag(bri) ); \ - bli_zzcopyris( -bli_zimag(a), bli_zreal(a), bli_zreal(bir), bli_zimag(bir) ); \ -} - - -#define bli_ccopy1es( a, bri, bir ) bli_cccopy1es( a, bri, bir ) -#define bli_zcopy1es( a, bri, bir ) bli_zzcopy1es( a, bri, bir ) - -#endif - diff --git a/frame/include/level0/1e/bli_copyj1es.h b/frame/include/level0/1e/bli_copyj1es.h deleted file mode 100644 index 25bb19d5bf..0000000000 --- a/frame/include/level0/1e/bli_copyj1es.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPYJ1ES_H -#define BLIS_COPYJ1ES_H - -// copyj1es - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#define bli_sscopyj1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_dscopyj1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_cscopyj1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_zscopyj1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } - -#define bli_sdcopyj1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_ddcopyj1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_cdcopyj1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_zdcopyj1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } - -#define bli_sccopyj1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_dccopyj1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_cccopyj1es( a, bri, bir ) \ -{ \ - bli_cccopyris( bli_creal(a), -bli_cimag(a), bli_creal(bri), bli_cimag(bri) ); \ - bli_cccopyris( bli_cimag(a), bli_creal(a), bli_creal(bir), bli_cimag(bir) ); \ -} -#define bli_zccopyj1es( a, bri, bir ) \ -{ \ - bli_zccopyris( bli_zreal(a), -bli_zimag(a), bli_creal(bri), bli_cimag(bri) ); \ - bli_zccopyris( bli_zimag(a), bli_zreal(a), bli_creal(bir), bli_cimag(bir) ); \ -} - -#define bli_szcopyj1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_dzcopyj1es( a, bri, bir ) { ( void )a; ( void )bri; ( void )bir; } -#define bli_czcopyj1es( a, bri, bir ) \ -{ \ - bli_czcopyris( bli_creal(a), -bli_cimag(a), bli_zreal(bri), bli_zimag(bri) ); \ - bli_czcopyris( bli_cimag(a), bli_creal(a), bli_zreal(bir), bli_zimag(bir) ); \ -} -#define bli_zzcopyj1es( a, bri, bir ) \ -{ \ - bli_zzcopyris( bli_zreal(a), -bli_zimag(a), bli_zreal(bri), bli_zimag(bri) ); \ - bli_zzcopyris( bli_zimag(a), bli_zreal(a), bli_zreal(bir), bli_zimag(bir) ); \ -} - - -#define bli_ccopyj1es( a, bri, bir ) bli_cccopyj1es( a, bri, bir ) -#define bli_zcopyj1es( a, bri, bir ) bli_zzcopyj1es( a, bri, bir ) - -#endif - diff --git a/frame/include/level0/1e/bli_invert1es.h b/frame/include/level0/1e/bli_invert1es.h deleted file mode 100644 index b45c3ca1ff..0000000000 --- a/frame/include/level0/1e/bli_invert1es.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_INVERT1ES_H -#define BLIS_INVERT1ES_H - -// invert1es - -#define bli_cinvert1es( bri, bir ) \ -{ \ - bli_cinvertris( bli_creal(bri), bli_cimag(bri) ); \ - bli_ccopyris( bli_creal(bri), -bli_cimag(bri), bli_cimag(bir), bli_creal(bir) ); \ -} - -#define bli_zinvert1es( bri, bir ) \ -{ \ - bli_zinvertris( bli_zreal(bri), bli_zimag(bri) ); \ - bli_zcopyris( bli_zreal(bri), -bli_zimag(bri), bli_zimag(bir), bli_zreal(bir) ); \ -} - -#endif - diff --git a/frame/include/level0/1e/bli_scal1es.h b/frame/include/level0/1e/bli_scal1es.h deleted file mode 100644 index 485a8ae645..0000000000 --- a/frame/include/level0/1e/bli_scal1es.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL1ES_H -#define BLIS_SCAL1ES_H - -// scal1es - -#define bli_cscal1es( a, yri, yir ) \ -{ \ - bli_cscalris( bli_creal(a), bli_cimag(a), bli_creal(yri), bli_cimag(yri) ); \ - bli_ccopyris( -bli_cimag(yri), bli_creal(yri), bli_creal(yir), bli_cimag(yir) ); \ -} - -#define bli_zscal1es( a, yri, yir ) \ -{ \ - bli_zscalris( bli_zreal(a), bli_zimag(a), bli_zreal(yri), bli_zimag(yri) ); \ - bli_zcopyris( -bli_zimag(yri), bli_zreal(yri), bli_zreal(yir), bli_zimag(yir) ); \ -} - -#endif - diff --git a/frame/include/level0/1e/bli_scal21es.h b/frame/include/level0/1e/bli_scal21es.h deleted file mode 100644 index 1cce973993..0000000000 --- a/frame/include/level0/1e/bli_scal21es.h +++ /dev/null @@ -1,235 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL21ES_H -#define BLIS_SCAL21ES_H - -// scal21es - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of y. - -// -- (axy) = (??s) ------------------------------------------------------------ - -#define bli_sssscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_sdsscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_scsscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_szsscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -#define bli_dssscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_ddsscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_dcsscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_dzsscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -#define bli_cssscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_cdsscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_ccsscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_czsscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -#define bli_zssscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_zdsscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_zcsscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_zzsscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -// -- (axy) = (??d) ------------------------------------------------------------ - -#define bli_ssdscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_sddscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_scdscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_szdscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -#define bli_dsdscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_dddscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_dcdscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_dzdscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -#define bli_csdscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_cddscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_ccdscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_czdscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -#define bli_zsdscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_zddscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_zcdscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_zzdscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_sdcscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_sccscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), -bli_cimag(x), bli_creal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_szcscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), -bli_zimag(x), bli_zreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} - -#define bli_dscscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_ddcscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_dccscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), -bli_cimag(x), bli_creal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_dzcscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), -bli_zimag(x), bli_zreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} - -#define bli_cscscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_simag(x), bli_sreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_cdcscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_dimag(x), bli_dreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_cccscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_cimag(x), bli_creal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_czcscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_zimag(x), bli_zreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} - -#define bli_zscscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_simag(x), bli_sreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_zdcscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_dimag(x), bli_dreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_zccscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_cimag(x), bli_creal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_zzcscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_zimag(x), bli_zreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_sdzscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_sczscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), -bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_szzscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), -bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} - -#define bli_dszscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_ddzscal21es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_dczscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), -bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_dzzscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), -bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} - -#define bli_cszscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_simag(x), bli_sreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_cdzscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_dimag(x), bli_dreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_cczscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_czzscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} - -#define bli_zszscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_simag(x), bli_sreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_zdzscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_dimag(x), bli_dreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_zczscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_zzzscal21es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} - - - -#define bli_cscal21es( a, x, yri, yir ) bli_cccscal21es( a, x, yri, yir ) -#define bli_zscal21es( a, x, yri, yir ) bli_zzzscal21es( a, x, yri, yir ) - -#endif - diff --git a/frame/include/level0/1e/bli_scal2j1es.h b/frame/include/level0/1e/bli_scal2j1es.h deleted file mode 100644 index d868f6fb72..0000000000 --- a/frame/include/level0/1e/bli_scal2j1es.h +++ /dev/null @@ -1,235 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2J1ES_H -#define BLIS_SCAL2J1ES_H - -// scal2j1es - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of y. - -// -- (axy) = (??s) ------------------------------------------------------------ - -#define bli_sssscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_sdsscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_scsscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_szsscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -#define bli_dssscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_ddsscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_dcsscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_dzsscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -#define bli_cssscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_cdsscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_ccsscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_czsscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -#define bli_zssscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_zdsscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_zcsscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_zzsscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -// -- (axy) = (??d) ------------------------------------------------------------ - -#define bli_ssdscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_sddscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_scdscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_szdscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -#define bli_dsdscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_dddscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_dcdscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_dzdscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -#define bli_csdscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_cddscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_ccdscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_czdscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -#define bli_zsdscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_zddscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_zcdscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_zzdscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_sdcscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_sccscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), -bli_cimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_cimag(x), bli_creal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_szcscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), -bli_zimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_zimag(x), bli_zreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} - -#define bli_dscscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_ddcscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_dccscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), -bli_cimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_cimag(x), bli_creal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_dzcscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), -bli_zimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_zimag(x), bli_zreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} - -#define bli_cscscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), -bli_simag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_simag(x), bli_sreal(x), bli_creal(yir), bli_zimag(yir) ); \ -} -#define bli_cdcscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), -bli_dimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_dimag(x), bli_dreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_cccscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), -bli_cimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_cimag(x), bli_creal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_czcscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), -bli_zimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zimag(x), bli_zreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} - -#define bli_zscscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), -bli_simag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_simag(x), bli_sreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_zdcscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), -bli_dimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_dimag(x), bli_dreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_zccscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), -bli_cimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_cimag(x), bli_creal(x), bli_creal(yir), bli_cimag(yir) ); \ -} -#define bli_zzcscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), -bli_zimag(x), bli_creal(yri), bli_cimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zimag(x), bli_zreal(x), bli_creal(yir), bli_cimag(yir) ); \ -} - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_sdzscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_sczscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), -bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_szzscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), -bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} - -#define bli_dszscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_ddzscal2j1es( a, x, yri, yir ) { ( void )a; ( void )x; ( void )yri; ( void )yir; } -#define bli_dczscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), -bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_dzzscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), -bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} - -#define bli_cszscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), -bli_simag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_simag(x), bli_sreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_cdzscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), -bli_dimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_dimag(x), bli_dreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_cczscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), -bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_czzscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), -bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} - -#define bli_zszscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), -bli_simag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_simag(x), bli_sreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_zdzscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), -bli_dimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_dimag(x), bli_dreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_zczscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), -bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} -#define bli_zzzscal2j1es( a, x, yri, yir ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), -bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ -} - - - -#define bli_cscal2j1es( a, x, yri, yir ) bli_cccscal2j1es( a, x, yri, yir ) -#define bli_zscal2j1es( a, x, yri, yir ) bli_zzzscal2j1es( a, x, yri, yir ) - -#endif - diff --git a/frame/include/level0/1m/bli_invert1ms_mxn_diag.h b/frame/include/level0/1m/bli_invert1ms_mxn_diag.h deleted file mode 100644 index dfdeb2293a..0000000000 --- a/frame/include/level0/1m/bli_invert1ms_mxn_diag.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_INVERT1MS_MXN_DIAG_H -#define BLIS_INVERT1MS_MXN_DIAG_H - -// invert1ms_mxn_diag - -#define bli_cinvert1ms_mxn_diag( schema, offm, offn, m, n, y, rs_y, cs_y, ld_y ) \ -{ \ - dim_t min_m_n = bli_min( m, n ); \ - dim_t i; \ -\ - /* Handle 1e and 1r separately. */ \ - if ( bli_is_1e_packed( schema ) ) \ - { \ - scomplex* restrict y_off_ri = y + (offm )*rs_y \ - + (offn )*cs_y; \ - scomplex* restrict y_off_ir = y + (offm )*rs_y \ - + (offn )*cs_y + ld_y/2; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_cinvert1es( *(y_off_ri + i*rs_y + i*cs_y), \ - *(y_off_ir + i*rs_y + i*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_1r_packed( schema ) ) */ \ - { \ - inc_t rs_y2 = rs_y; \ - inc_t cs_y2 = cs_y; \ -\ - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ \ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ -\ - float* restrict y_cast = ( float* )y; \ - float* restrict y_off_r = y_cast + (offm )*rs_y2 \ - + (offn )*cs_y2; \ - float* restrict y_off_i = y_cast + (offm )*rs_y2 \ - + (offn )*cs_y2 + ld_y; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_cinvert1rs( *(y_off_r + i*rs_y2 + i*cs_y2), \ - *(y_off_i + i*rs_y2 + i*cs_y2) ); \ - } \ - } \ -} - -#define bli_zinvert1ms_mxn_diag( schema, offm, offn, m, n, y, rs_y, cs_y, ld_y ) \ -{ \ - dim_t min_m_n = bli_min( m, n ); \ - dim_t i; \ -\ - /* Handle 1e and 1r separately. */ \ - if ( bli_is_1e_packed( schema ) ) \ - { \ - dcomplex* restrict y_off_ri = y + (offm )*rs_y \ - + (offn )*cs_y; \ - dcomplex* restrict y_off_ir = y + (offm )*rs_y \ - + (offn )*cs_y + ld_y/2; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_zinvert1es( *(y_off_ri + i*rs_y + i*cs_y), \ - *(y_off_ir + i*rs_y + i*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_1r_packed( schema ) ) */ \ - { \ - inc_t rs_y2 = rs_y; \ - inc_t cs_y2 = cs_y; \ -\ - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ \ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ -\ - double* restrict y_cast = ( double* )y; \ - double* restrict y_off_r = y_cast + (offm )*rs_y2 \ - + (offn )*cs_y2; \ - double* restrict y_off_i = y_cast + (offm )*rs_y2 \ - + (offn )*cs_y2 + ld_y; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_zinvert1rs( *(y_off_r + i*rs_y2 + i*cs_y2), \ - *(y_off_i + i*rs_y2 + i*cs_y2) ); \ - } \ - } \ -} - -#endif diff --git a/frame/include/level0/1m/bli_scal1ms_mxn.h b/frame/include/level0/1m/bli_scal1ms_mxn.h deleted file mode 100644 index 7d845576d5..0000000000 --- a/frame/include/level0/1m/bli_scal1ms_mxn.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL1MS_MXN_H -#define BLIS_SCAL1MS_MXN_H - -// scal1ms_mxn - -#define bli_cscal1ms_mxn( schema, m, n, a, y, rs_y, cs_y, ld_y ) \ -{ \ - dim_t i, j; \ -\ - /* Handle 1e and 1r separately. */ \ - if ( bli_is_1e_packed( schema ) ) \ - { \ - scomplex* restrict y_ri = y; \ - scomplex* restrict y_ir = y + ld_y/2; \ -\ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < m; ++i ) \ - { \ - bli_cscal1es( *(a), \ - *(y_ri + i*rs_y + j*cs_y), \ - *(y_ir + i*rs_y + j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_1r_packed( schema ) ) */ \ - { \ - inc_t rs_y2 = rs_y; \ - inc_t cs_y2 = cs_y; \ -\ - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ \ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ -\ - float* restrict y_cast = ( float* )y; \ - float* restrict y_r = y_cast; \ - float* restrict y_i = y_cast + ld_y; \ -\ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < m; ++i ) \ - { \ - bli_cscal1rs( *(a), \ - *(y_r + i*rs_y2 + j*cs_y2), \ - *(y_i + i*rs_y2 + j*cs_y2) ); \ - } \ - } \ -} - -#define bli_zscal1ms_mxn( schema, m, n, a, y, rs_y, cs_y, ld_y ) \ -{ \ - dim_t i, j; \ -\ - /* Handle 1e and 1r separately. */ \ - if ( bli_is_1e_packed( schema ) ) \ - { \ - dcomplex* restrict y_ri = y; \ - dcomplex* restrict y_ir = y + ld_y/2; \ -\ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < m; ++i ) \ - { \ - bli_zscal1es( *(a), \ - *(y_ri + i*rs_y + j*cs_y), \ - *(y_ir + i*rs_y + j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_1r_packed( schema ) ) */ \ - { \ - inc_t rs_y2 = rs_y; \ - inc_t cs_y2 = cs_y; \ -\ - /* Scale the non-unit stride by two for the 1r loop, - which steps in units of real (not complex) values. */ \ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ -\ - double* restrict y_cast = ( double* )y; \ - double* restrict y_r = y_cast; \ - double* restrict y_i = y_cast + ld_y; \ -\ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < m; ++i ) \ - { \ - bli_zscal1rs( *(a), \ - *(y_r + i*rs_y2 + j*cs_y2), \ - *(y_i + i*rs_y2 + j*cs_y2) ); \ - } \ - } \ -} - -#endif diff --git a/frame/include/level0/1m/bli_scal21ms_mxn.h b/frame/include/level0/1m/bli_scal21ms_mxn.h deleted file mode 100644 index 9a824fbd5f..0000000000 --- a/frame/include/level0/1m/bli_scal21ms_mxn.h +++ /dev/null @@ -1,202 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL21MS_MXN_H -#define BLIS_SCAL21MS_MXN_H - -// scal21ms_mxn - -BLIS_INLINE void bli_cscal21ms_mxn - ( - const pack_t schema, - const conj_t conjx, - const dim_t m, - const dim_t n, - scomplex* restrict alpha, - scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t ld_y - ) -{ - dim_t i, j; - - /* Handle 1e and 1r separately. */ - if ( bli_is_1e_packed( schema ) ) - { - scomplex* restrict y_ri = y; - scomplex* restrict y_ir = y + ld_y/2; - - if ( bli_is_conj( conjx ) ) - { - for ( j = 0; j < n; ++j ) - for ( i = 0; i < m; ++i ) - { - bli_cscal2j1es( *(alpha), - *(x + i*rs_x + j*cs_x), - *(y_ri + i*rs_y + j*cs_y), - *(y_ir + i*rs_y + j*cs_y) ); - } - } - else /* if ( bli_is_noconj( conjx ) ) */ - { - for ( j = 0; j < n; ++j ) - for ( i = 0; i < m; ++i ) - { - bli_cscal21es( *(alpha), - *(x + i*rs_x + j*cs_x), - *(y_ri + i*rs_y + j*cs_y), - *(y_ir + i*rs_y + j*cs_y) ); - } - } - } - else /* if ( bli_is_1r_packed( schema ) ) */ - { - inc_t rs_y2 = rs_y; - inc_t cs_y2 = cs_y; - - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } - - float* restrict y_cast = ( float* )y; - float* restrict y_r = y_cast; - float* restrict y_i = y_cast + ld_y; - - if ( bli_is_conj( conjx ) ) - { - for ( j = 0; j < n; ++j ) - for ( i = 0; i < m; ++i ) - { - bli_cscal2j1rs( *(alpha), - *(x + i*rs_x + j*cs_x ), - *(y_r + i*rs_y2 + j*cs_y2), - *(y_i + i*rs_y2 + j*cs_y2) ); - } - } - else /* if ( bli_is_noconj( conjx ) ) */ - { - for ( j = 0; j < n; ++j ) - for ( i = 0; i < m; ++i ) - { - bli_cscal21rs( *(alpha), - *(x + i*rs_x + j*cs_x ), - *(y_r + i*rs_y2 + j*cs_y2), - *(y_i + i*rs_y2 + j*cs_y2) ); - } - } - } -} - -BLIS_INLINE void bli_zscal21ms_mxn - ( - const pack_t schema, - const conj_t conjx, - const dim_t m, - const dim_t n, - dcomplex* restrict alpha, - dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t ld_y - ) -{ - dim_t i, j; - - /* Handle 1e and 1r separately. */ - if ( bli_is_1e_packed( schema ) ) - { - dcomplex* restrict y_ri = y; - dcomplex* restrict y_ir = y + ld_y/2; - - if ( bli_is_conj( conjx ) ) - { - for ( j = 0; j < n; ++j ) - for ( i = 0; i < m; ++i ) - { - bli_zscal2j1es( *(alpha), - *(x + i*rs_x + j*cs_x), - *(y_ri + i*rs_y + j*cs_y), - *(y_ir + i*rs_y + j*cs_y) ); - } - } - else /* if ( bli_is_noconj( conjx ) ) */ - { - for ( j = 0; j < n; ++j ) - for ( i = 0; i < m; ++i ) - { - bli_zscal21es( *(alpha), - *(x + i*rs_x + j*cs_x), - *(y_ri + i*rs_y + j*cs_y), - *(y_ir + i*rs_y + j*cs_y) ); - } - } - } - else /* if ( bli_is_1r_packed( schema ) ) */ - { - inc_t rs_y2 = rs_y; - inc_t cs_y2 = cs_y; - - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } - - double* restrict y_cast = ( double* )y; - double* restrict y_r = y_cast; - double* restrict y_i = y_cast + ld_y; - - if ( bli_is_conj( conjx ) ) - { - for ( j = 0; j < n; ++j ) - for ( i = 0; i < m; ++i ) - { - bli_zscal2j1rs( *(alpha), - *(x + i*rs_x + j*cs_x ), - *(y_r + i*rs_y2 + j*cs_y2), - *(y_i + i*rs_y2 + j*cs_y2) ); - } - } - else /* if ( bli_is_noconj( conjx ) ) */ - { - for ( j = 0; j < n; ++j ) - for ( i = 0; i < m; ++i ) - { - bli_zscal21rs( *(alpha), - *(x + i*rs_x + j*cs_x ), - *(y_r + i*rs_y2 + j*cs_y2), - *(y_i + i*rs_y2 + j*cs_y2) ); - } - } - } -} - -#endif diff --git a/frame/include/level0/1m/bli_scal21ms_mxn_diag.h b/frame/include/level0/1m/bli_scal21ms_mxn_diag.h deleted file mode 100644 index 21074338da..0000000000 --- a/frame/include/level0/1m/bli_scal21ms_mxn_diag.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL21MS_MXN_DIAG_H -#define BLIS_SCAL21MS_MXN_DIAG_H - -// scal21ms_mxn_diag - -#define bli_cscscal21ms_mxn_diag( schema, m, n, a, x, rs_x, cs_x, y, rs_y, cs_y, ld_y ) \ -{ \ - dim_t min_m_n = bli_min( m, n ); \ - dim_t i; \ -\ - /* Handle 1e and 1r separately. */ \ - if ( bli_is_1e_packed( schema ) ) \ - { \ - scomplex* restrict y_off_ri = y; \ - scomplex* restrict y_off_ir = y + ld_y/2; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_cscscal21es( *(a), \ - *(x + i*rs_x + i*cs_x), \ - *(y_off_ri + i*rs_y + i*cs_y), \ - *(y_off_ir + i*rs_y + i*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_1r_packed( schema ) ) */ \ - { \ - inc_t rs_y2 = rs_y; \ - inc_t cs_y2 = cs_y; \ -\ - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ \ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ -\ - float* restrict y_cast = ( float* )y; \ - float* restrict y_off_r = y_cast; \ - float* restrict y_off_i = y_cast + ld_y; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_cscscal21rs( *(a), \ - *(x + i*rs_x + i*cs_x ), \ - *(y_off_r + i*rs_y2 + i*cs_y2), \ - *(y_off_i + i*rs_y2 + i*cs_y2) ); \ - } \ - } \ -} - -#define bli_zdzscal21ms_mxn_diag( schema, m, n, a, x, rs_x, cs_x, y, rs_y, cs_y, ld_y ) \ -{ \ - dim_t min_m_n = bli_min( m, n ); \ - dim_t i; \ -\ - /* Handle 1e and 1r separately. */ \ - if ( bli_is_1e_packed( schema ) ) \ - { \ - dcomplex* restrict y_off_ri = y; \ - dcomplex* restrict y_off_ir = y + ld_y/2; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_zdzscal21es( *(a), \ - *(x + i*rs_x + i*cs_x), \ - *(y_off_ri + i*rs_y + i*cs_y), \ - *(y_off_ir + i*rs_y + i*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_1r_packed( schema ) ) */ \ - { \ - inc_t rs_y2 = rs_y; \ - inc_t cs_y2 = cs_y; \ -\ - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ \ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ -\ - double* restrict y_cast = ( double* )y; \ - double* restrict y_off_r = y_cast; \ - double* restrict y_off_i = y_cast + ld_y; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_zdzscal21rs( *(a), \ - *(x + i*rs_x + i*cs_x ), \ - *(y_off_r + i*rs_y2 + i*cs_y2), \ - *(y_off_i + i*rs_y2 + i*cs_y2) ); \ - } \ - } \ -} - -#endif diff --git a/frame/include/level0/1m/bli_scal21ms_mxn_uplo.h b/frame/include/level0/1m/bli_scal21ms_mxn_uplo.h deleted file mode 100644 index a41d3e57f7..0000000000 --- a/frame/include/level0/1m/bli_scal21ms_mxn_uplo.h +++ /dev/null @@ -1,296 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL21MS_MXN_UPLO_H -#define BLIS_SCAL21MS_MXN_UPLO_H - -// scal21ms_mxn_uplo - -#define bli_cscal21ms_mxn_uplo( schema, uplo, conjx, m, a, x, rs_x, cs_x, y, rs_y, cs_y, ld_y ) \ -{ \ - dim_t i, j; \ -\ - /* Handle 1e and 1r separately. */ \ - if ( bli_is_1e_packed( schema ) ) \ - { \ - scomplex* restrict y_ri = y; \ - scomplex* restrict y_ir = y + ld_y/2; \ -\ - if ( bli_is_lower( uplo ) ) \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = j; i < m; ++i ) \ - { \ - bli_cscal2j1es( *(a), \ - *(x + i*rs_x + j*cs_x), \ - *(y_ri + i*rs_y + j*cs_y), \ - *(y_ir + i*rs_y + j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = j; i < m; ++i ) \ - { \ - bli_cscal21es( *(a), \ - *(x + i*rs_x + j*cs_x), \ - *(y_ri + i*rs_y + j*cs_y), \ - *(y_ir + i*rs_y + j*cs_y) ); \ - } \ - } \ - } \ - else /* if ( bli_is_upper( uplo ) ) */ \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = 0; i < j + 1; ++i ) \ - { \ - bli_cscal2j1es( *(a), \ - *(x + i*rs_x + j*cs_x), \ - *(y_ri + i*rs_y + j*cs_y), \ - *(y_ir + i*rs_y + j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = 0; i < j + 1; ++i ) \ - { \ - bli_cscal21es( *(a), \ - *(x + i*rs_x + j*cs_x), \ - *(y_ri + i*rs_y + j*cs_y), \ - *(y_ir + i*rs_y + j*cs_y) ); \ - } \ - } \ - } \ - } \ - else /* if ( bli_is_1r_packed( schema ) ) */ \ - { \ - inc_t rs_y2 = rs_y; \ - inc_t cs_y2 = cs_y; \ -\ - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ \ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ -\ - float* restrict y_cast = ( float* )y; \ - float* restrict y_r = y_cast; \ - float* restrict y_i = y_cast + ld_y; \ -\ - if ( bli_is_lower( uplo ) ) \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = j; i < m; ++i ) \ - { \ - bli_cscal2j1rs( *(a), \ - *(x + i*rs_x + j*cs_x ), \ - *(y_r + i*rs_y2 + j*cs_y2), \ - *(y_i + i*rs_y2 + j*cs_y2) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = j; i < m; ++i ) \ - { \ - bli_cscal21rs( *(a), \ - *(x + i*rs_x + j*cs_x ), \ - *(y_r + i*rs_y2 + j*cs_y2), \ - *(y_i + i*rs_y2 + j*cs_y2) ); \ - } \ - } \ - } \ - else /* if ( bli_is_upper( uplo ) ) */ \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = 0; i < j + 1; ++i ) \ - { \ - bli_cscal2j1rs( *(a), \ - *(x + i*rs_x + j*cs_x ), \ - *(y_r + i*rs_y2 + j*cs_y2), \ - *(y_i + i*rs_y2 + j*cs_y2) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = 0; i < j + 1; ++i ) \ - { \ - bli_cscal21rs( *(a), \ - *(x + i*rs_x + j*cs_x ), \ - *(y_r + i*rs_y2 + j*cs_y2), \ - *(y_i + i*rs_y2 + j*cs_y2) ); \ - } \ - } \ - } \ - } \ -} - -#define bli_zscal21ms_mxn_uplo( schema, uplo, conjx, m, a, x, rs_x, cs_x, y, rs_y, cs_y, ld_y ) \ -{ \ - dim_t i, j; \ -\ - /* Handle 1e and 1r separately. */ \ - if ( bli_is_1e_packed( schema ) ) \ - { \ - dcomplex* restrict y_ri = y; \ - dcomplex* restrict y_ir = y + ld_y/2; \ -\ - if ( bli_is_lower( uplo ) ) \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = j; i < m; ++i ) \ - { \ - bli_zscal2j1es( *(a), \ - *(x + i*rs_x + j*cs_x), \ - *(y_ri + i*rs_y + j*cs_y), \ - *(y_ir + i*rs_y + j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = j; i < m; ++i ) \ - { \ - bli_zscal21es( *(a), \ - *(x + i*rs_x + j*cs_x), \ - *(y_ri + i*rs_y + j*cs_y), \ - *(y_ir + i*rs_y + j*cs_y) ); \ - } \ - } \ - } \ - else /* if ( bli_is_upper( uplo ) ) */ \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = 0; i < j + 1; ++i ) \ - { \ - bli_zscal2j1es( *(a), \ - *(x + i*rs_x + j*cs_x), \ - *(y_ri + i*rs_y + j*cs_y), \ - *(y_ir + i*rs_y + j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = 0; i < j + 1; ++i ) \ - { \ - bli_zscal21es( *(a), \ - *(x + i*rs_x + j*cs_x), \ - *(y_ri + i*rs_y + j*cs_y), \ - *(y_ir + i*rs_y + j*cs_y) ); \ - } \ - } \ - } \ - } \ - else /* if ( bli_is_1r_packed( schema ) ) */ \ - { \ - inc_t rs_y2 = rs_y; \ - inc_t cs_y2 = cs_y; \ -\ - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ \ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ -\ - double* restrict y_cast = ( double* )y; \ - double* restrict y_r = y_cast; \ - double* restrict y_i = y_cast + ld_y; \ -\ - if ( bli_is_lower( uplo ) ) \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = j; i < m; ++i ) \ - { \ - bli_zscal2j1rs( *(a), \ - *(x + i*rs_x + j*cs_x ), \ - *(y_r + i*rs_y2 + j*cs_y2), \ - *(y_i + i*rs_y2 + j*cs_y2) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = j; i < m; ++i ) \ - { \ - bli_zscal21rs( *(a), \ - *(x + i*rs_x + j*cs_x ), \ - *(y_r + i*rs_y2 + j*cs_y2), \ - *(y_i + i*rs_y2 + j*cs_y2) ); \ - } \ - } \ - } \ - else /* if ( bli_is_upper( uplo ) ) */ \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = 0; i < j + 1; ++i ) \ - { \ - bli_zscal2j1rs( *(a), \ - *(x + i*rs_x + j*cs_x ), \ - *(y_r + i*rs_y2 + j*cs_y2), \ - *(y_i + i*rs_y2 + j*cs_y2) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( j = 0; j < m; ++j ) \ - for ( i = 0; i < j + 1; ++i ) \ - { \ - bli_zscal21rs( *(a), \ - *(x + i*rs_x + j*cs_x ), \ - *(y_r + i*rs_y2 + j*cs_y2), \ - *(y_i + i*rs_y2 + j*cs_y2) ); \ - } \ - } \ - } \ - } \ -} - -#endif diff --git a/frame/include/level0/1m/bli_set1ms_mxn.h b/frame/include/level0/1m/bli_set1ms_mxn.h deleted file mode 100644 index f7d492c234..0000000000 --- a/frame/include/level0/1m/bli_set1ms_mxn.h +++ /dev/null @@ -1,194 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SET1MS_MXN_H -#define BLIS_SET1MS_MXN_H - -// set1ms_mxn - -#define bli_sset1ms_mxn( schema, offm, offn, m, n, a, y, rs_y, cs_y, ld_y ) \ -{ \ - /* Include real domain version to facilitate macro-izing mixed-datatype - components of packm. */ \ -} - -#define bli_dset1ms_mxn( schema, offm, offn, m, n, a, y, rs_y, cs_y, ld_y ) \ -{ \ - /* Include real domain version to facilitate macro-izing mixed-datatype - components of packm. */ \ -} - -BLIS_INLINE void bli_cset1ms_mxn - ( - const pack_t schema, - const dim_t offm, - const dim_t offn, - const dim_t m, - const dim_t n, - scomplex* restrict alpha, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t ld_y - ) -{ - inc_t offm_local = offm; - inc_t offn_local = offn; - dim_t m_local = m; - dim_t n_local = n; - inc_t rs_y1 = rs_y; - inc_t cs_y1 = cs_y; - inc_t rs_y2 = rs_y; - inc_t cs_y2 = cs_y; - dim_t i, j; - - /* Optimization: The loops walk through y with unit stride if y is - column-stored. If y is row-stored, swap the dimensions and strides - to preserve unit stride movement. */ - if ( cs_y == 1 ) - { - bli_swap_incs( &offm_local, &offn_local ); - bli_swap_dims( &m_local, &n_local ); - bli_swap_incs( &rs_y1, &cs_y1 ); - bli_swap_incs( &rs_y2, &cs_y2 ); - } - - /* Handle 1e and 1r separately. */ - if ( bli_is_1e_packed( schema ) ) - { - scomplex* restrict y_off_ri = y + (offm_local )*rs_y1 - + (offn_local )*cs_y1; - scomplex* restrict y_off_ir = y + (offm_local )*rs_y1 - + (offn_local )*cs_y1 + ld_y/2; - - for ( j = 0; j < n_local; ++j ) - for ( i = 0; i < m_local; ++i ) - { - bli_ccopy1es( *(alpha), - *(y_off_ri + i*rs_y1 + j*cs_y1), - *(y_off_ir + i*rs_y1 + j*cs_y1) ); - } - } - else /* if ( bli_is_1r_packed( schema ) ) */ - { - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } - - float* restrict y_cast = ( float* )y; - float* restrict y_off_r = y_cast + (offm_local )*rs_y2 - + (offn_local )*cs_y2; - float* restrict y_off_i = y_cast + (offm_local )*rs_y2 - + (offn_local )*cs_y2 + ld_y; - - for ( j = 0; j < n_local; ++j ) - for ( i = 0; i < m_local; ++i ) - { - bli_ccopy1rs( *(alpha), - *(y_off_r + i*rs_y2 + j*cs_y2), - *(y_off_i + i*rs_y2 + j*cs_y2) ); - } - } -} - -BLIS_INLINE void bli_zset1ms_mxn - ( - const pack_t schema, - const dim_t offm, - const dim_t offn, - const dim_t m, - const dim_t n, - dcomplex* restrict alpha, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t ld_y - ) -{ - inc_t offm_local = offm; - inc_t offn_local = offn; - dim_t m_local = m; - dim_t n_local = n; - inc_t rs_y1 = rs_y; - inc_t cs_y1 = cs_y; - inc_t rs_y2 = rs_y; - inc_t cs_y2 = cs_y; - dim_t i, j; - - /* Optimization: The loops walk through y with unit stride if y is - column-stored. If y is row-stored, swap the dimensions and strides - to preserve unit stride movement. */ - if ( cs_y == 1 ) - { - bli_swap_incs( &offm_local, &offn_local ); - bli_swap_dims( &m_local, &n_local ); - bli_swap_incs( &rs_y1, &cs_y1 ); - bli_swap_incs( &rs_y2, &cs_y2 ); - } - - /* Handle 1e and 1r separately. */ - if ( bli_is_1e_packed( schema ) ) - { - dcomplex* restrict y_off_ri = y + (offm_local )*rs_y1 - + (offn_local )*cs_y1; - dcomplex* restrict y_off_ir = y + (offm_local )*rs_y1 - + (offn_local )*cs_y1 + ld_y/2; - - for ( j = 0; j < n_local; ++j ) - for ( i = 0; i < m_local; ++i ) - { - bli_zcopy1es( *(alpha), - *(y_off_ri + i*rs_y1 + j*cs_y1), - *(y_off_ir + i*rs_y1 + j*cs_y1) ); - } - } - else /* if ( bli_is_1r_packed( schema ) ) */ - { - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } - - double* restrict y_cast = ( double* )y; - double* restrict y_off_r = y_cast + (offm_local )*rs_y2 - + (offn_local )*cs_y2; - double* restrict y_off_i = y_cast + (offm_local )*rs_y2 - + (offn_local )*cs_y2 + ld_y; - - for ( j = 0; j < n_local; ++j ) - for ( i = 0; i < m_local; ++i ) - { - bli_zcopy1rs( *(alpha), - *(y_off_r + i*rs_y2 + j*cs_y2), - *(y_off_i + i*rs_y2 + j*cs_y2) ); - } - } -} - -#endif diff --git a/frame/include/level0/1m/bli_set1ms_mxn_diag.h b/frame/include/level0/1m/bli_set1ms_mxn_diag.h deleted file mode 100644 index 856e47bce8..0000000000 --- a/frame/include/level0/1m/bli_set1ms_mxn_diag.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SET1MS_MXN_DIAG_H -#define BLIS_SET1MS_MXN_DIAG_H - -// set1ms_mxn_diag - -#define bli_cset1ms_mxn_diag( schema, offm, offn, m, n, a, y, rs_y, cs_y, ld_y ) \ -{ \ - dim_t min_m_n = bli_min( m, n ); \ - dim_t i; \ -\ - /* Handle 1e and 1r separately. */ \ - if ( bli_is_1e_packed( schema ) ) \ - { \ - scomplex* restrict y_off_ri = y + (offm )*rs_y \ - + (offn )*cs_y; \ - scomplex* restrict y_off_ir = y + (offm )*rs_y \ - + (offn )*cs_y + ld_y/2; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_ccopy1es( *(a), \ - *(y_off_ri + i*rs_y + i*cs_y), \ - *(y_off_ir + i*rs_y + i*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_1r_packed( schema ) ) */ \ - { \ - inc_t rs_y2 = rs_y; \ - inc_t cs_y2 = cs_y; \ -\ - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ \ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ -\ - float* restrict y_cast = ( float* )y; \ - float* restrict y_off_r = y_cast + (offm )*rs_y2 \ - + (offn )*cs_y2; \ - float* restrict y_off_i = y_cast + (offm )*rs_y2 \ - + (offn )*cs_y2 + ld_y; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_ccopy1rs( *(a), \ - *(y_off_r + i*rs_y2 + i*cs_y2), \ - *(y_off_i + i*rs_y2 + i*cs_y2) ); \ - } \ - } \ -} - -#define bli_zset1ms_mxn_diag( schema, offm, offn, m, n, a, y, rs_y, cs_y, ld_y ) \ -{ \ - dim_t min_m_n = bli_min( m, n ); \ - dim_t i; \ -\ - /* Handle 1e and 1r separately. */ \ - if ( bli_is_1e_packed( schema ) ) \ - { \ - dcomplex* restrict y_off_ri = y + (offm )*rs_y \ - + (offn )*cs_y; \ - dcomplex* restrict y_off_ir = y + (offm )*rs_y \ - + (offn )*cs_y + ld_y/2; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_zcopy1es( *(a), \ - *(y_off_ri + i*rs_y + i*cs_y), \ - *(y_off_ir + i*rs_y + i*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_1r_packed( schema ) ) */ \ - { \ - inc_t rs_y2 = rs_y; \ - inc_t cs_y2 = cs_y; \ -\ - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ \ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ -\ - double* restrict y_cast = ( double* )y; \ - double* restrict y_off_r = y_cast + (offm )*rs_y2 \ - + (offn )*cs_y2; \ - double* restrict y_off_i = y_cast + (offm )*rs_y2 \ - + (offn )*cs_y2 + ld_y; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_zcopy1rs( *(a), \ - *(y_off_r + i*rs_y2 + i*cs_y2), \ - *(y_off_i + i*rs_y2 + i*cs_y2) ); \ - } \ - } \ -} - -#endif diff --git a/frame/include/level0/1m/bli_set1ms_mxn_uplo.h b/frame/include/level0/1m/bli_set1ms_mxn_uplo.h deleted file mode 100644 index d672b91744..0000000000 --- a/frame/include/level0/1m/bli_set1ms_mxn_uplo.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SET1MS_MXN_UPLO_H -#define BLIS_SET1MS_MXN_UPLO_H - -// set1ms_mxn_uplo - -#define bli_cset1ms_mxn_uplo( schema, diagoff, uplo, m, n, a, y, rs_y, cs_y, ld_y ) \ -{ \ - doff_t diagoff_abs = bli_abs( diagoff ); \ - inc_t offdiag_inc; \ - dim_t i, j; \ -\ - /* Handle 1e and 1r separately. */ \ - if ( bli_is_1e_packed( schema ) ) \ - { \ - /* Set the off-diagonal increment. */ \ - if ( diagoff > 0 ) offdiag_inc = cs_y; \ - else /* if ( diagoff < 0 ) */ offdiag_inc = rs_y; \ -\ - scomplex* restrict y0 = y + (diagoff_abs )*offdiag_inc; \ - scomplex* restrict y_ri = y0; \ - scomplex* restrict y_ir = y0 + ld_y/2; \ -\ - if ( bli_is_lower( uplo ) ) \ - { \ - for ( j = 0; j < n; ++j ) \ - for ( i = j; i < m; ++i ) \ - { \ - bli_ccopy1es( *(a), \ - *(y_ri + i*rs_y + j*cs_y), \ - *(y_ir + i*rs_y + j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_upper( uplo ) ) */ \ - { \ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < j + 1; ++i ) \ - { \ - bli_ccopy1es( *(a), \ - *(y_ri + i*rs_y + j*cs_y), \ - *(y_ir + i*rs_y + j*cs_y) ); \ - } \ - } \ - } \ - else /* if ( bli_is_1r_packed( schema ) ) */ \ - { \ - inc_t rs_y2 = rs_y; \ - inc_t cs_y2 = cs_y; \ -\ - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ \ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ -\ - /* Set the off-diagonal increment. */ \ - if ( diagoff > 0 ) offdiag_inc = cs_y2; \ - else /* if ( diagoff < 0 ) */ offdiag_inc = rs_y2; \ -\ - float* restrict y0 = ( float* )y + (diagoff_abs )*offdiag_inc; \ - float* restrict y_r = y0; \ - float* restrict y_i = y0 + ld_y; \ -\ - if ( bli_is_lower( uplo ) ) \ - { \ - for ( j = 0; j < n; ++j ) \ - for ( i = j; i < m; ++i ) \ - { \ - bli_ccopy1rs( *(a), \ - *(y_r + i*rs_y2 + j*cs_y2), \ - *(y_i + i*rs_y2 + j*cs_y2) ); \ - } \ - } \ - else /* if ( bli_is_upper( uplo ) ) */ \ - { \ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < j + 1; ++i ) \ - { \ - bli_ccopy1rs( *(a), \ - *(y_r + i*rs_y2 + j*cs_y2), \ - *(y_i + i*rs_y2 + j*cs_y2) ); \ - } \ - } \ - } \ -} - -#define bli_zset1ms_mxn_uplo( schema, diagoff, uplo, m, n, a, y, rs_y, cs_y, ld_y ) \ -{ \ - doff_t diagoff_abs = bli_abs( diagoff ); \ - inc_t offdiag_inc; \ - dim_t i, j; \ -\ - /* Handle 1e and 1r separately. */ \ - if ( bli_is_1e_packed( schema ) ) \ - { \ - /* Set the off-diagonal increment. */ \ - if ( diagoff > 0 ) offdiag_inc = cs_y; \ - else /* if ( diagoff < 0 ) */ offdiag_inc = rs_y; \ -\ - dcomplex* restrict y0 = y + (diagoff_abs )*offdiag_inc; \ - dcomplex* restrict y_ri = y0; \ - dcomplex* restrict y_ir = y0 + ld_y/2; \ -\ - if ( bli_is_lower( uplo ) ) \ - { \ - for ( j = 0; j < n; ++j ) \ - for ( i = j; i < m; ++i ) \ - { \ - bli_zcopy1es( *(a), \ - *(y_ri + i*rs_y + j*cs_y), \ - *(y_ir + i*rs_y + j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_upper( uplo ) ) */ \ - { \ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < j + 1; ++i ) \ - { \ - bli_zcopy1es( *(a), \ - *(y_ri + i*rs_y + j*cs_y), \ - *(y_ir + i*rs_y + j*cs_y) ); \ - } \ - } \ - } \ - else /* if ( bli_is_1r_packed( schema ) ) */ \ - { \ - inc_t rs_y2 = rs_y; \ - inc_t cs_y2 = cs_y; \ -\ - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ \ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ -\ - /* Set the off-diagonal increment. */ \ - if ( diagoff > 0 ) offdiag_inc = cs_y2; \ - else /* if ( diagoff < 0 ) */ offdiag_inc = rs_y2; \ -\ - double* restrict y0 = ( double* )y + (diagoff_abs )*offdiag_inc; \ - double* restrict y_r = y0; \ - double* restrict y_i = y0 + ld_y; \ -\ - if ( bli_is_lower( uplo ) ) \ - { \ - for ( j = 0; j < n; ++j ) \ - for ( i = j; i < m; ++i ) \ - { \ - bli_zcopy1rs( *(a), \ - *(y_r + i*rs_y2 + j*cs_y2), \ - *(y_i + i*rs_y2 + j*cs_y2) ); \ - } \ - } \ - else /* if ( bli_is_upper( uplo ) ) */ \ - { \ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < j + 1; ++i ) \ - { \ - bli_zcopy1rs( *(a), \ - *(y_r + i*rs_y2 + j*cs_y2), \ - *(y_i + i*rs_y2 + j*cs_y2) ); \ - } \ - } \ - } \ -} - -#endif diff --git a/frame/include/level0/1m/bli_seti01ms_mxn_diag.h b/frame/include/level0/1m/bli_seti01ms_mxn_diag.h deleted file mode 100644 index dd8bf7a3b4..0000000000 --- a/frame/include/level0/1m/bli_seti01ms_mxn_diag.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SETI01MS_MXN_DIAG_H -#define BLIS_SETI01MS_MXN_DIAG_H - -// seti01ms_mxn_diag - -#define bli_cseti01ms_mxn_diag( schema, m, n, y, rs_y, cs_y, ld_y ) \ -{ \ - dim_t min_m_n = bli_min( m, n ); \ - dim_t i; \ -\ - /* Handle 1e and 1r separately. */ \ - if ( bli_is_1e_packed( schema ) ) \ - { \ - scomplex* restrict y_off_ri = y; \ - scomplex* restrict y_off_ir = y + ld_y/2; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_cseti0s( *(y_off_ri + i*rs_y + i*cs_y) ); \ - bli_csetr0s( *(y_off_ir + i*rs_y + i*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_1r_packed( schema ) ) */ \ - { \ - inc_t rs_y2 = rs_y; \ - inc_t cs_y2 = cs_y; \ -\ - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ \ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ -\ - float* restrict y_cast = ( float* )y; \ - float* restrict y_off_i = y_cast + ld_y; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_sset0s( *(y_off_i + i*rs_y2 + i*cs_y2) ); \ - } \ - } \ -} - -#define bli_zseti01ms_mxn_diag( schema, m, n, y, rs_y, cs_y, ld_y ) \ -{ \ - dim_t min_m_n = bli_min( m, n ); \ - dim_t i; \ -\ - /* Handle 1e and 1r separately. */ \ - if ( bli_is_1e_packed( schema ) ) \ - { \ - dcomplex* restrict y_off_ri = y; \ - dcomplex* restrict y_off_ir = y + ld_y/2; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_zseti0s( *(y_off_ri + i*rs_y + i*cs_y) ); \ - bli_zsetr0s( *(y_off_ir + i*rs_y + i*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_1r_packed( schema ) ) */ \ - { \ - inc_t rs_y2 = rs_y; \ - inc_t cs_y2 = cs_y; \ -\ - /* Scale the non-unit stride by two for the 1r loop, which steps - in units of real (not complex) values. */ \ - if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ - else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ -\ - double* restrict y_cast = ( double* )y; \ - double* restrict y_off_i = y_cast + ld_y; \ -\ - for ( i = 0; i < min_m_n; ++i ) \ - { \ - bli_dset0s( *(y_off_i + i*rs_y2 + i*cs_y2) ); \ - } \ - } \ -} - -#endif diff --git a/frame/include/level0/1r/bli_copy1rs.h b/frame/include/level0/1r/bli_copy1rs.h deleted file mode 100644 index 0211497f75..0000000000 --- a/frame/include/level0/1r/bli_copy1rs.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPY1RS_H -#define BLIS_COPY1RS_H - -// copy1rs - -#define bli_ccopy1rs( a, br, bi ) \ -{ \ - bli_ccopyris( bli_creal(a), bli_cimag(a), br, bi ); \ -} - -#define bli_zcopy1rs( a, br, bi ) \ -{ \ - bli_zcopyris( bli_zreal(a), bli_zimag(a), br, bi ); \ -} - -#endif - diff --git a/frame/include/level0/1r/bli_copyj1rs.h b/frame/include/level0/1r/bli_copyj1rs.h deleted file mode 100644 index d7cdff3051..0000000000 --- a/frame/include/level0/1r/bli_copyj1rs.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPYJ1RS_H -#define BLIS_COPYJ1RS_H - -// copyj1rs - -#define bli_ccopyj1rs( a, br, bi ) \ -{ \ - bli_ccopyjris( bli_creal(a), bli_cimag(a), br, bi ); \ -} - -#define bli_zcopyj1rs( a, br, bi ) \ -{ \ - bli_zcopyjris( bli_zreal(a), bli_zimag(a), br, bi ); \ -} - -#endif - diff --git a/frame/include/level0/1r/bli_invert1rs.h b/frame/include/level0/1r/bli_invert1rs.h deleted file mode 100644 index 16f7283fd7..0000000000 --- a/frame/include/level0/1r/bli_invert1rs.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_INVERT1RS_H -#define BLIS_INVERT1RS_H - -// invert1rs - -#define bli_cinvert1rs( xr, xi ) bli_cinvertris( xr, xi ) -#define bli_zinvert1rs( xr, xi ) bli_zinvertris( xr, xi ) - -#endif diff --git a/frame/include/level0/1r/bli_scal1rs.h b/frame/include/level0/1r/bli_scal1rs.h deleted file mode 100644 index f75c589d0a..0000000000 --- a/frame/include/level0/1r/bli_scal1rs.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL1RS_H -#define BLIS_SCAL1RS_H - -// scal1rs - -#define bli_cscal1rs( a, yr, yi ) \ -{ \ - bli_cscalris( bli_creal(a), bli_cimag(a), yr, yi ); \ -} - -#define bli_zscal1rs( a, yr, yi ) \ -{ \ - bli_zscalris( bli_zreal(a), bli_zimag(a), yr, yi ); \ -} - -#define bli_scscal1rs( a, yr, yi ) \ -{ \ - bli_scscalris( bli_sreal(a), bli_simag(a), yr, yi ); \ -} - -#define bli_dzscal1rs( a, yr, yi ) \ -{ \ - bli_dzscalris( bli_dreal(a), bli_dimag(a), yr, yi ); \ -} - -#endif - diff --git a/frame/include/level0/bb/bli_bcastbbs_mxn.h b/frame/include/level0/bb/bli_bcastbbs_mxn.h deleted file mode 100644 index d060b767b6..0000000000 --- a/frame/include/level0/bb/bli_bcastbbs_mxn.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_BCASTBBS_MXN_H -#define BLIS_BCASTBBS_MXN_H - -// bcastbbs_mxn - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname ) \ -\ -BLIS_INLINE void PASTEMAC(ch,opname) \ - ( \ - const dim_t m, \ - const dim_t n, \ - ctype* restrict y, const inc_t incy, const inc_t ldy \ - ) \ -{ \ - /* Assume that the duplication factor is the column stride of y. */ \ - const dim_t d = ldy; \ - const dim_t ds_y = 1; \ -\ - for ( dim_t i = 0; i < m; ++i ) \ - { \ - ctype* restrict yi = y + i*incy; \ -\ - for ( dim_t j = 0; j < n; ++j ) \ - { \ - ctype* restrict yij = yi + j*ldy; \ -\ - for ( dim_t p = 1; p < d; ++p ) \ - { \ - ctype* restrict yijd = yij + p*ds_y; \ -\ - PASTEMAC(ch,copys)( *yij, *yijd ); \ - } \ - } \ - } \ -} - -INSERT_GENTFUNC_BASIC( bcastbbs_mxn ) - -#endif diff --git a/frame/include/level0/bb/bli_scal2bbs_mxn.h b/frame/include/level0/bb/bli_scal2bbs_mxn.h deleted file mode 100644 index d6f95f97fe..0000000000 --- a/frame/include/level0/bb/bli_scal2bbs_mxn.h +++ /dev/null @@ -1,204 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2BBS_MXN_H -#define BLIS_SCAL2BBS_MXN_H - -// scal2bbs_mxn - -#undef GENTFUNCRO -#define GENTFUNCRO( ctype, ch, opname ) \ -\ -BLIS_INLINE void PASTEMAC(ch,opname) \ - ( \ - const conj_t conjx, \ - const dim_t m, \ - const dim_t n, \ - const ctype* restrict alpha, \ - const ctype* restrict x, const inc_t incx, const inc_t ldx, \ - ctype* restrict y, const inc_t incy, const inc_t ldy \ - ) \ -{ \ - /* Assume that the duplication factor is the row stride of y. */ \ - const dim_t d = incy; \ - const dim_t ds_y = 1; \ -\ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( dim_t j = 0; j < n; ++j ) \ - { \ - const ctype* restrict xj = x + j*ldx; \ - ctype* restrict yj = y + j*ldy; \ -\ - for ( dim_t i = 0; i < m; ++i ) \ - { \ - const ctype* restrict xij = xj + i*incx; \ - ctype* restrict yij = yj + i*incy; \ -\ - PASTEMAC(ch,scal2js)( *alpha, *xij, *yij ); \ -\ - for ( dim_t p = 1; p < d; ++p ) \ - { \ - ctype* restrict yijd = yij + p*ds_y; \ -\ - PASTEMAC(ch,copys)( *yij, *yijd ); \ - } \ - } \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( dim_t j = 0; j < n; ++j ) \ - { \ - const ctype* restrict xj = x + j*ldx; \ - ctype* restrict yj = y + j*ldy; \ -\ - for ( dim_t i = 0; i < m; ++i ) \ - { \ - const ctype* restrict xij = xj + i*incx; \ - ctype* restrict yij = yj + i*incy; \ -\ - PASTEMAC(ch,scal2s)( *alpha, *xij, *yij ); \ -\ - for ( dim_t p = 1; p < d; ++p ) \ - { \ - ctype* restrict yijd = yij + p*ds_y; \ -\ - PASTEMAC(ch,copys)( *yij, *yijd ); \ - } \ - } \ - } \ - } \ -} - -INSERT_GENTFUNCRO_BASIC( scal2bbs_mxn ) - - -#undef GENTFUNCCO -#define GENTFUNCCO( ctype, ctype_r, ch, chr, opname ) \ -\ -BLIS_INLINE void PASTEMAC(ch,opname) \ - ( \ - const conj_t conjx, \ - const dim_t m, \ - const dim_t n, \ - const ctype* restrict alpha, \ - const ctype* restrict x, const inc_t incx, const inc_t ldx, \ - ctype* restrict y, const inc_t incy, const inc_t ldy \ - ) \ -{ \ - /* Assume that the duplication factor is the row stride of y. */ \ - const dim_t d = incy; \ - const dim_t ds_y = 1; \ -\ - const inc_t incx2 = 2 * incx; \ - const inc_t ldx2 = 2 * ldx; \ -\ - const inc_t incy2 = 2 * incy; \ - const inc_t ldy2 = 2 * ldy; \ -\ - ctype_r* restrict alpha_r = ( ctype_r* )alpha; \ - ctype_r* restrict alpha_i = ( ctype_r* )alpha + 1; \ - ctype_r* restrict chi_r = ( ctype_r* )x; \ - ctype_r* restrict chi_i = ( ctype_r* )x + 1; \ - ctype_r* restrict psi_r = ( ctype_r* )y; \ - ctype_r* restrict psi_i = ( ctype_r* )y + 1*d; \ -\ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( dim_t j = 0; j < n; ++j ) \ - { \ - ctype_r* restrict chij_r = chi_r + j*ldx2; \ - ctype_r* restrict chij_i = chi_i + j*ldx2; \ - ctype_r* restrict psij_r = psi_r + j*ldy2; \ - ctype_r* restrict psij_i = psi_i + j*ldy2; \ -\ - for ( dim_t i = 0; i < m; ++i ) \ - { \ - ctype_r* restrict chiij_r = chij_r + i*incx2; \ - ctype_r* restrict chiij_i = chij_i + i*incx2; \ - ctype_r* restrict psiij_r = psij_r + i*incy2; \ - ctype_r* restrict psiij_i = psij_i + i*incy2; \ -\ - PASTEMAC(ch,scal2jris)( *alpha_r, *alpha_i, \ - *chiij_r, *chiij_i, \ - *psiij_r, *psiij_i ); \ -\ - for ( dim_t p = 1; p < d; ++p ) \ - { \ - ctype_r* restrict psiijd_r = psiij_r + p*ds_y; \ - ctype_r* restrict psiijd_i = psiij_i + p*ds_y; \ -\ - PASTEMAC(ch,copyris)( *psiij_r, *psiij_i, \ - *psiijd_r, *psiijd_i ); \ - } \ - } \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( dim_t j = 0; j < n; ++j ) \ - { \ - ctype_r* restrict chij_r = chi_r + j*ldx2; \ - ctype_r* restrict chij_i = chi_i + j*ldx2; \ - ctype_r* restrict psij_r = psi_r + j*ldy2; \ - ctype_r* restrict psij_i = psi_i + j*ldy2; \ -\ - for ( dim_t i = 0; i < m; ++i ) \ - { \ - ctype_r* restrict chiij_r = chij_r + i*incx2; \ - ctype_r* restrict chiij_i = chij_i + i*incx2; \ - ctype_r* restrict psiij_r = psij_r + i*incy2; \ - ctype_r* restrict psiij_i = psij_i + i*incy2; \ -\ - PASTEMAC(ch,scal2ris)( *alpha_r, *alpha_i, \ - *chiij_r, *chiij_i, \ - *psiij_r, *psiij_i ); \ -\ - for ( dim_t p = 1; p < d; ++p ) \ - { \ - ctype_r* restrict psiijd_r = psiij_r + p*ds_y; \ - ctype_r* restrict psiijd_i = psiij_i + p*ds_y; \ -\ - PASTEMAC(ch,copyris)( *psiij_r, *psiij_i, \ - *psiijd_r, *psiijd_i ); \ - } \ - } \ - } \ - } \ -} - -INSERT_GENTFUNCCO( scal2bbs_mxn ) - -#endif diff --git a/frame/include/level0/bb/bli_set0bbs_mxn.h b/frame/include/level0/bb/bli_set0bbs_mxn.h deleted file mode 100644 index f051218196..0000000000 --- a/frame/include/level0/bb/bli_set0bbs_mxn.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SET0BBS_MXN_H -#define BLIS_SET0BBS_MXN_H - -// set0bbs_mxn - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname ) \ -\ -BLIS_INLINE void PASTEMAC(ch,opname) \ - ( \ - const dim_t m, \ - const dim_t n, \ - ctype* restrict y, const inc_t incy, const inc_t ldy \ - ) \ -{ \ - /* Assume that the duplication factor is the row stride of y. */ \ - const dim_t d = incy; \ - const dim_t ds_y = 1; \ -\ - for ( dim_t j = 0; j < n; ++j ) \ - { \ - ctype* restrict yj = y + j*ldy; \ -\ - for ( dim_t i = 0; i < m; ++i ) \ - { \ - ctype* restrict yij = yj + i*incy; \ -\ - for ( dim_t p = 0; p < d; ++p ) \ - { \ - ctype* restrict yijd = yij + p*ds_y; \ -\ - PASTEMAC(ch,set0s)( *yijd ); \ - } \ - } \ - } \ -} - -INSERT_GENTFUNC_BASIC( set0bbs_mxn ) - -#endif diff --git a/frame/include/level0/bli_absq2s.h b/frame/include/level0/bli_absq2s.h deleted file mode 100644 index dee2bea5f6..0000000000 --- a/frame/include/level0/bli_absq2s.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_ABSQR2_H -#define BLIS_ABSQR2_H - -// absq2s - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of a. - -#define bli_ssabsq2s( x, a ) bli_sabsq2ris( bli_sreal(x), bli_simag(x), bli_sreal(a), 0.0F ) -#define bli_dsabsq2s( x, a ) bli_dabsq2ris( bli_dreal(x), bli_dimag(x), bli_sreal(a), 0.0F ) -#define bli_csabsq2s( x, a ) { float ti; bli_cabsq2ris( bli_creal(x), bli_cimag(x), bli_sreal(a), ti ); ( void )ti; } -#define bli_zsabsq2s( x, a ) { float ti; bli_zabsq2ris( bli_zreal(x), bli_zimag(x), bli_sreal(a), ti ); ( void )ti; } - -#define bli_sdabsq2s( x, a ) bli_sabsq2ris( bli_sreal(x), bli_simag(x), bli_dreal(a), 0.0 ) -#define bli_ddabsq2s( x, a ) bli_dabsq2ris( bli_dreal(x), bli_dimag(x), bli_dreal(a), 0.0 ) -#define bli_cdabsq2s( x, a ) { double ti; bli_cabsq2ris( bli_creal(x), bli_cimag(x), bli_dreal(a), ti ); ( void )ti; } -#define bli_zdabsq2s( x, a ) { double ti; bli_zabsq2ris( bli_zreal(x), bli_zimag(x), bli_dreal(a), ti ); ( void )ti; } - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_scabsq2s( x, a ) bli_sabsq2ris( bli_sreal(x), bli_simag(x), bli_creal(a), bli_cimag(a) ) -#define bli_dcabsq2s( x, a ) bli_dabsq2ris( bli_dreal(x), bli_dimag(x), bli_creal(a), bli_cimag(a) ) -#define bli_ccabsq2s( x, a ) bli_cabsq2ris( bli_creal(x), bli_cimag(x), bli_creal(a), bli_cimag(a) ) -#define bli_zcabsq2s( x, a ) bli_zabsq2ris( bli_zreal(x), bli_zimag(x), bli_creal(a), bli_cimag(a) ) - -#define bli_szabsq2s( x, a ) bli_sabsq2ris( bli_sreal(x), bli_simag(x), bli_zreal(a), bli_zimag(a) ) -#define bli_dzabsq2s( x, a ) bli_dabsq2ris( bli_dreal(x), bli_dimag(x), bli_zreal(a), bli_zimag(a) ) -#define bli_czabsq2s( x, a ) bli_cabsq2ris( bli_creal(x), bli_cimag(x), bli_zreal(a), bli_zimag(a) ) -#define bli_zzabsq2s( x, a ) bli_zabsq2ris( bli_zreal(x), bli_zimag(x), bli_zreal(a), bli_zimag(a) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_scabsq2s( x, a ) bli_scsets( (x) * (x), 0.0, (a) ) -#define bli_dcabsq2s( x, a ) bli_dcsets( (x) * (x), 0.0, (a) ) -#define bli_ccabsq2s( x, a ) bli_ccsets( bli_creal(x) * bli_creal(x) + \ - bli_cimag(x) * bli_cimag(x), 0.0, (a) ) -#define bli_zcabsq2s( x, a ) bli_zcsets( bli_zreal(x) * bli_zreal(x) + \ - bli_zimag(x) * bli_zimag(x), 0.0, (a) ) - -#define bli_szabsq2s( x, a ) bli_szsets( (x) * (x), 0.0, (a) ) -#define bli_dzabsq2s( x, a ) bli_dzsets( (x) * (x), 0.0, (a) ) -#define bli_czabsq2s( x, a ) bli_czsets( bli_creal(x) * bli_creal(x) + \ - bli_cimag(x) * bli_cimag(x), 0.0, (a) ) -#define bli_zzabsq2s( x, a ) bli_zzsets( bli_zreal(x) * bli_zreal(x) + \ - bli_zimag(x) * bli_zimag(x), 0.0, (a) ) - -#endif // BLIS_ENABLE_C99_COMPLEX - -#define bli_sabsq2s( x, a ) bli_ssabsq2s( x, a ) -#define bli_dabsq2s( x, a ) bli_ddabsq2s( x, a ) -#define bli_cabsq2s( x, a ) bli_ccabsq2s( x, a ) -#define bli_zabsq2s( x, a ) bli_zzabsq2s( x, a ) - - -#endif diff --git a/frame/include/level0/bli_abval2s.h b/frame/include/level0/bli_abval2s.h deleted file mode 100644 index 63df867dfa..0000000000 --- a/frame/include/level0/bli_abval2s.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_ABVAL2S_H -#define BLIS_ABVAL2S_H - -// abval2s - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of a. - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_ssabval2s( x, a ) bli_sabval2ris( bli_sreal(x), bli_simag(x), bli_sreal(a), 0.0F ) -#define bli_dsabval2s( x, a ) bli_dabval2ris( bli_dreal(x), bli_dimag(x), bli_sreal(a), 0.0F ) -#define bli_csabval2s( x, a ) { float ti; bli_cabval2ris( bli_creal(x), bli_cimag(x), bli_sreal(a), ti ); ( void )ti; } -#define bli_zsabval2s( x, a ) { float ti; bli_zabval2ris( bli_zreal(x), bli_zimag(x), bli_sreal(a), ti ); ( void )ti; } - -#define bli_sdabval2s( x, a ) bli_sabval2ris( bli_sreal(x), bli_simag(x), bli_dreal(a), 0.0 ) -#define bli_ddabval2s( x, a ) bli_dabval2ris( bli_dreal(x), bli_dimag(x), bli_dreal(a), 0.0 ) -#define bli_cdabval2s( x, a ) { double ti; bli_cabval2ris( bli_creal(x), bli_cimag(x), bli_dreal(a), ti ); ( void )ti; } -#define bli_zdabval2s( x, a ) { double ti; bli_zabval2ris( bli_zreal(x), bli_zimag(x), bli_dreal(a), ti ); ( void )ti; } - -#define bli_scabval2s( x, a ) bli_sabval2ris( bli_sreal(x), bli_simag(x), bli_creal(a), bli_cimag(a) ) -#define bli_dcabval2s( x, a ) bli_dabval2ris( bli_dreal(x), bli_dimag(x), bli_creal(a), bli_cimag(a) ) -#define bli_ccabval2s( x, a ) bli_cabval2ris( bli_creal(x), bli_cimag(x), bli_creal(a), bli_cimag(a) ) -#define bli_zcabval2s( x, a ) bli_zabval2ris( bli_zreal(x), bli_zimag(x), bli_creal(a), bli_cimag(a) ) - -#define bli_szabval2s( x, a ) bli_sabval2ris( bli_sreal(x), bli_simag(x), bli_zreal(a), bli_zimag(a) ) -#define bli_dzabval2s( x, a ) bli_dabval2ris( bli_dreal(x), bli_dimag(x), bli_zreal(a), bli_zimag(a) ) -#define bli_czabval2s( x, a ) bli_cabval2ris( bli_creal(x), bli_cimag(x), bli_zreal(a), bli_zimag(a) ) -#define bli_zzabval2s( x, a ) bli_zabval2ris( bli_zreal(x), bli_zimag(x), bli_zreal(a), bli_zimag(a) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_ssabval2s( x, a ) bli_sssets( fabsf(x), 0.0, (a) ) -#define bli_dsabval2s( x, a ) bli_dssets( fabs (x), 0.0, (a) ) -#define bli_csabval2s( x, a ) bli_cssets( cabsf(x), 0.0, (a) ) -#define bli_zsabval2s( x, a ) bli_zssets( cabs (x), 0.0, (a) ) - -#define bli_sdabval2s( x, a ) bli_sdsets( fabsf(x), 0.0, (a) ) -#define bli_ddabval2s( x, a ) bli_ddsets( fabs (x), 0.0, (a) ) -#define bli_cdabval2s( x, a ) bli_cdsets( cabsf(x), 0.0, (a) ) -#define bli_zdabval2s( x, a ) bli_zdsets( cabs (x), 0.0, (a) ) - -#define bli_scabval2s( x, a ) bli_scsets( fabsf(x), 0.0, (a) ) -#define bli_dcabval2s( x, a ) bli_dcsets( fabs (x), 0.0, (a) ) -#define bli_ccabval2s( x, a ) bli_ccsets( cabsf(x), 0.0, (a) ) -#define bli_zcabval2s( x, a ) bli_zcsets( cabs (x), 0.0, (a) ) - -#define bli_szabval2s( x, a ) bli_szsets( fabsf(x), 0.0, (a) ) -#define bli_dzabval2s( x, a ) bli_dzsets( fabs (x), 0.0, (a) ) -#define bli_czabval2s( x, a ) bli_czsets( cabsf(x), 0.0, (a) ) -#define bli_zzabval2s( x, a ) bli_zzsets( cabs (x), 0.0, (a) ) - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_sabval2s( x, a ) bli_ssabval2s( x, a ) -#define bli_dabval2s( x, a ) bli_ddabval2s( x, a ) -#define bli_cabval2s( x, a ) bli_ccabval2s( x, a ) -#define bli_zabval2s( x, a ) bli_zzabval2s( x, a ) - - -#endif diff --git a/frame/include/level0/bli_add3s.h b/frame/include/level0/bli_add3s.h deleted file mode 100644 index f922a791b9..0000000000 --- a/frame/include/level0/bli_add3s.h +++ /dev/null @@ -1,192 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_ADD3S_H -#define BLIS_ADD3S_H - -// add3s - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of b. -// - The third char encodes the type of c. - - -// -- (axy) = (??s) ------------------------------------------------------------ - -#define bli_sssadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) ) -#define bli_dssadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) ) -#define bli_cssadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) ) -#define bli_zssadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) ) - -#define bli_sdsadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) ) -#define bli_ddsadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) ) -#define bli_cdsadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) ) -#define bli_zdsadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) ) - -#define bli_scsadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) ) -#define bli_dcsadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) ) -#define bli_ccsadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) ) -#define bli_zcsadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) ) - -#define bli_szsadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) ) -#define bli_dzsadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) ) -#define bli_czsadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) ) -#define bli_zzsadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) ) - -// -- (axy) = (??d) ------------------------------------------------------------ - -#define bli_ssdadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) ) -#define bli_dsdadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) ) -#define bli_csdadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) ) -#define bli_zsdadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) ) - -#define bli_sddadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) ) -#define bli_dddadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) ) -#define bli_cddadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) ) -#define bli_zddadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) ) - -#define bli_scdadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) ) -#define bli_dcdadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) ) -#define bli_ccdadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) ) -#define bli_zcdadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) ) - -#define bli_szdadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) ) -#define bli_dzdadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) ) -#define bli_czdadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) ) -#define bli_zzdadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) ) -#define bli_dscadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) ) -#define bli_cscadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) ) -#define bli_zscadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) ) - -#define bli_sdcadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) ) -#define bli_ddcadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) ) -#define bli_cdcadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) ) -#define bli_zdcadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) ) - -#define bli_sccadd3s( a, b, c ) bli_cadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) ) -#define bli_dccadd3s( a, b, c ) bli_cadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) ) -#define bli_cccadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) ) -#define bli_zccadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) ) - -#define bli_szcadd3s( a, b, c ) bli_cadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) ) -#define bli_dzcadd3s( a, b, c ) bli_cadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) ) -#define bli_czcadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) ) -#define bli_zzcadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) ) - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) ) -#define bli_dszadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) ) -#define bli_cszadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) ) -#define bli_zszadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) ) - -#define bli_sdzadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) ) -#define bli_ddzadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) ) -#define bli_cdzadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) ) -#define bli_zdzadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) ) - -#define bli_sczadd3s( a, b, c ) bli_zadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) ) -#define bli_dczadd3s( a, b, c ) bli_zadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) ) -#define bli_cczadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) ) -#define bli_zczadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) ) - -#define bli_szzadd3s( a, b, c ) bli_zadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) ) -#define bli_dzzadd3s( a, b, c ) bli_zadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) ) -#define bli_czzadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) ) -#define bli_zzzadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_dscadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_cscadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_zscadd3s( a, b, c ) { (c) = (a) + (b); } - -#define bli_sdcadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_ddcadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_cdcadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_zdcadd3s( a, b, c ) { (c) = (a) + (b); } - -#define bli_sccadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_dccadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_cccadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_zccadd3s( a, b, c ) { (c) = (a) + (b); } - -#define bli_szcadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_dzcadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_czcadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_zzcadd3s( a, b, c ) { (c) = (a) + (b); } - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_dszadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_cszadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_zszadd3s( a, b, c ) { (c) = (a) + (b); } - -#define bli_sdzadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_ddzadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_cdzadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_zdzadd3s( a, b, c ) { (c) = (a) + (b); } - -#define bli_sczadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_dczadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_cczadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_zczadd3s( a, b, c ) { (c) = (a) + (b); } - -#define bli_szzadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_dzzadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_czzadd3s( a, b, c ) { (c) = (a) + (b); } -#define bli_zzzadd3s( a, b, c ) { (c) = (a) + (b); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_sadd3s( a, b, c ) bli_sssadd3s( a, b, c ) -#define bli_dadd3s( a, b, c ) bli_dddadd3s( a, b, c ) -#define bli_cadd3s( a, b, c ) bli_cccadd3s( a, b, c ) -#define bli_zadd3s( a, b, c ) bli_zzzadd3s( a, b, c ) - - -#endif - diff --git a/frame/include/level0/bli_addjs.h b/frame/include/level0/bli_addjs.h deleted file mode 100644 index abe570c536..0000000000 --- a/frame/include/level0/bli_addjs.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_ADDJS_H -#define BLIS_ADDJS_H - -// addjs - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of y. - -#define bli_ssaddjs( a, y ) bli_saddjris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) -#define bli_dsaddjs( a, y ) bli_saddjris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_csaddjs( a, y ) bli_saddjris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_zsaddjs( a, y ) bli_saddjris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) - -#define bli_sdaddjs( a, y ) bli_daddjris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_ddaddjs( a, y ) bli_daddjris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_cdaddjs( a, y ) bli_daddjris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_zdaddjs( a, y ) bli_daddjris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_scaddjs( a, y ) bli_caddjris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) -#define bli_dcaddjs( a, y ) bli_caddjris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_ccaddjs( a, y ) bli_caddjris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_zcaddjs( a, y ) bli_caddjris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) - -#define bli_szaddjs( a, y ) bli_zaddjris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_dzaddjs( a, y ) bli_zaddjris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_czaddjs( a, y ) bli_zaddjris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_zzaddjs( a, y ) bli_zaddjris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_scaddjs( a, y ) { (y) += (a); } -#define bli_dcaddjs( a, y ) { (y) += (a); } -#define bli_ccaddjs( a, y ) { (y) += conjf(a); } -#define bli_zcaddjs( a, y ) { (y) += conj (a); } - -#define bli_szaddjs( a, y ) { (y) += (a); } -#define bli_dzaddjs( a, y ) { (y) += (a); } -#define bli_czaddjs( a, y ) { (y) += conjf(a); } -#define bli_zzaddjs( a, y ) { (y) += conj (a); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_saddjs( a, y ) bli_ssaddjs( a, y ) -#define bli_daddjs( a, y ) bli_ddaddjs( a, y ) -#define bli_caddjs( a, y ) bli_ccaddjs( a, y ) -#define bli_zaddjs( a, y ) bli_zzaddjs( a, y ) - - -#endif - diff --git a/frame/include/level0/bli_adds.h b/frame/include/level0/bli_adds.h deleted file mode 100644 index 340f2c3e08..0000000000 --- a/frame/include/level0/bli_adds.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_ADDS_H -#define BLIS_ADDS_H - -// adds - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of y. - -#define bli_ssadds( a, y ) bli_saddris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) -#define bli_dsadds( a, y ) bli_saddris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_csadds( a, y ) bli_saddris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_zsadds( a, y ) bli_saddris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) - -#define bli_sdadds( a, y ) bli_daddris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_ddadds( a, y ) bli_daddris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_cdadds( a, y ) bli_daddris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_zdadds( a, y ) bli_daddris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_scadds( a, y ) bli_caddris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) -#define bli_dcadds( a, y ) bli_caddris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_ccadds( a, y ) bli_caddris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_zcadds( a, y ) bli_caddris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) - -#define bli_szadds( a, y ) bli_zaddris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_dzadds( a, y ) bli_zaddris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_czadds( a, y ) bli_zaddris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_zzadds( a, y ) bli_zaddris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_scadds( a, y ) { (y) += (a); } -#define bli_dcadds( a, y ) { (y) += (a); } -#define bli_ccadds( a, y ) { (y) += (a); } -#define bli_zcadds( a, y ) { (y) += (a); } - -#define bli_szadds( a, y ) { (y) += (a); } -#define bli_dzadds( a, y ) { (y) += (a); } -#define bli_czadds( a, y ) { (y) += (a); } -#define bli_zzadds( a, y ) { (y) += (a); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_sadds( a, y ) bli_ssadds( a, y ) -#define bli_dadds( a, y ) bli_ddadds( a, y ) -#define bli_cadds( a, y ) bli_ccadds( a, y ) -#define bli_zadds( a, y ) bli_zzadds( a, y ) - - -#endif - diff --git a/frame/include/level0/bli_adds_mxn.h b/frame/include/level0/bli_adds_mxn.h deleted file mode 100644 index 8a92a17a63..0000000000 --- a/frame/include/level0/bli_adds_mxn.h +++ /dev/null @@ -1,513 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_ADDS_MXN_H -#define BLIS_ADDS_MXN_H - -// adds_mxn - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - - -// xy = ?s - -BLIS_INLINE void bli_ssadds_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, - float* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_ssadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_ssadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_ssadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_dsadds_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, - float* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dsadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_dsadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dsadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_csadds_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - float* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_csadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_csadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_csadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_zsadds_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - float* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zsadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_zsadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zsadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} - -// xy = ?d - -BLIS_INLINE void bli_sdadds_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, - double* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_sdadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_sdadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_sdadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_ddadds_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, - double* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_ddadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_ddadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_ddadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_cdadds_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - double* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_cdadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_cdadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_cdadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_zdadds_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - double* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zdadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_zdadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zdadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} - -// xy = ?c - -BLIS_INLINE void bli_scadds_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_scadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_scadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_scadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_dcadds_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dcadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_dcadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dcadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_ccadds_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_ccadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_ccadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_ccadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_zcadds_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zcadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_zcadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zcadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} - -// xy = ?z - -BLIS_INLINE void bli_szadds_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_szadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_szadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_szadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_dzadds_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dzadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_dzadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dzadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_czadds_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_czadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_czadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_czadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_zzadds_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zzadds( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_zzadds( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zzadds( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} - - - -BLIS_INLINE void bli_sadds_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, - float* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ - bli_ssadds_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); -} -BLIS_INLINE void bli_dadds_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, - double* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ - bli_ddadds_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); -} -BLIS_INLINE void bli_cadds_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ - bli_ccadds_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); -} -BLIS_INLINE void bli_zadds_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ - bli_zzadds_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); -} - - -#endif diff --git a/frame/include/level0/bli_adds_mxn_uplo.h b/frame/include/level0/bli_adds_mxn_uplo.h deleted file mode 100644 index 29f0ee038c..0000000000 --- a/frame/include/level0/bli_adds_mxn_uplo.h +++ /dev/null @@ -1,212 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_ADDS_MXN_UPLO_H -#define BLIS_ADDS_MXN_UPLO_H - -// adds_mxn_u - -#define bli_ssadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - { \ - for ( _i = 0; _i < m; ++_i ) \ - { \ - if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ - { \ - bli_ssadds( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ -} - -#define bli_ddadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - { \ - for ( _i = 0; _i < m; ++_i ) \ - { \ - if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ - { \ - bli_ddadds( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ -} - -#define bli_ccadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - { \ - for ( _i = 0; _i < m; ++_i ) \ - { \ - if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ - { \ - bli_ccadds( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ -} - -#define bli_zzadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - { \ - for ( _i = 0; _i < m; ++_i ) \ - { \ - if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ - { \ - bli_zzadds( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ -} - -// adds_mxn_l - -#define bli_ssadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - { \ - for ( _i = 0; _i < m; ++_i ) \ - { \ - if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ - { \ - bli_ssadds( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ -} - -#define bli_ddadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - { \ - for ( _i = 0; _i < m; ++_i ) \ - { \ - if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ - { \ - bli_ddadds( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ -} - -#define bli_ccadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - { \ - for ( _i = 0; _i < m; ++_i ) \ - { \ - if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ - { \ - bli_ccadds( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ -} - -#define bli_zzadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - { \ - for ( _i = 0; _i < m; ++_i ) \ - { \ - if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ - { \ - bli_zzadds( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ -} - - -#define bli_sadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - bli_ssadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ -} -#define bli_dadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - bli_ddadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ -} -#define bli_cadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - bli_ccadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ -} -#define bli_zadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - bli_zzadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ -} -#define bli_sadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - bli_ssadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ -} -#define bli_dadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - bli_ddadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ -} -#define bli_cadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - bli_ccadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ -} -#define bli_zadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ -{ \ - bli_zzadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ -} - -#endif diff --git a/frame/include/level0/old/ri3/bli_copyri3s.h b/frame/include/level0/bli_assigns.h similarity index 77% rename from frame/include/level0/old/ri3/bli_copyri3s.h rename to frame/include/level0/bli_assigns.h index 86ec79b0a8..97a79b299b 100644 --- a/frame/include/level0/old/ri3/bli_copyri3s.h +++ b/frame/include/level0/bli_assigns.h @@ -32,34 +32,21 @@ */ -#ifndef BLIS_COPYRI3S_H -#define BLIS_COPYRI3S_H +#ifndef BLIS_ASSIGNS_H +#define BLIS_ASSIGNS_H -// copyri3s +// assigns -#define bli_scopyri3s( ar, ai, br, bi, bri ) \ -{ \ - (br) = (ar); \ -} +// Notes: +// - The first char encodes the domain of output y. +// - These macros are used to avoid updates on an output's imaginary part +// when that output exists only in the real domain (i.e. has no imaginary +// part to begin with). -#define bli_dcopyri3s( ar, ai, br, bi, bri ) \ -{ \ - (br) = (ar); \ -} +#define bli_rassigns( xr, xi, yr, yi ) { yr = xr; } +#define bli_cassigns( xr, xi, yr, yi ) { yr = xr; yi = xi; } +#define bli_jassigns( xr, xi, yr, yi ) { yi = xi; } -#define bli_ccopyri3s( ar, ai, br, bi, bri ) \ -{ \ - (br) = (ar); \ - (bi) = (ai); \ - (bri) = (ar) + (ai); \ -} - -#define bli_zcopyri3s( ar, ai, br, bi, bri ) \ -{ \ - (br) = (ar); \ - (bi) = (ai); \ - (bri) = (ar) + (ai); \ -} #endif diff --git a/frame/include/level0/bli_axmys.h b/frame/include/level0/bli_axmys.h deleted file mode 100644 index 7b0410caad..0000000000 --- a/frame/include/level0/bli_axmys.h +++ /dev/null @@ -1,192 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_AXMYS_H -#define BLIS_AXMYS_H - -// axmys - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of y. - - -// -- (axy) = (??s) ------------------------------------------------------------ - -#define bli_sssaxmys( a, x, y ) bli_saxmyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dssaxmys( a, x, y ) bli_saxmyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cssaxmys( a, x, y ) bli_saxmyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zssaxmys( a, x, y ) bli_saxmyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_sdsaxmys( a, x, y ) bli_saxmyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_ddsaxmys( a, x, y ) bli_saxmyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cdsaxmys( a, x, y ) bli_saxmyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zdsaxmys( a, x, y ) bli_saxmyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_scsaxmys( a, x, y ) bli_saxmyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dcsaxmys( a, x, y ) bli_saxmyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_ccsaxmys( a, x, y ) bli_saxmyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zcsaxmys( a, x, y ) bli_saxmyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_szsaxmys( a, x, y ) bli_saxmyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dzsaxmys( a, x, y ) bli_saxmyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_czsaxmys( a, x, y ) bli_saxmyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zzsaxmys( a, x, y ) bli_saxmyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) - -// -- (axy) = (??d) ------------------------------------------------------------ - -#define bli_ssdaxmys( a, x, y ) bli_daxmyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dsdaxmys( a, x, y ) bli_daxmyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_csdaxmys( a, x, y ) bli_daxmyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zsdaxmys( a, x, y ) bli_daxmyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_sddaxmys( a, x, y ) bli_daxmyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dddaxmys( a, x, y ) bli_daxmyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_cddaxmys( a, x, y ) bli_daxmyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zddaxmys( a, x, y ) bli_daxmyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_scdaxmys( a, x, y ) bli_daxmyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dcdaxmys( a, x, y ) bli_daxmyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_ccdaxmys( a, x, y ) bli_daxmyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zcdaxmys( a, x, y ) bli_daxmyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_szdaxmys( a, x, y ) bli_daxmyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dzdaxmys( a, x, y ) bli_daxmyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_czdaxmys( a, x, y ) bli_daxmyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zzdaxmys( a, x, y ) bli_daxmyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscaxmys( a, x, y ) bli_saxmyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dscaxmys( a, x, y ) bli_saxmyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cscaxmys( a, x, y ) bli_caxmyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zscaxmys( a, x, y ) bli_caxmyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_sdcaxmys( a, x, y ) bli_saxmyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_ddcaxmys( a, x, y ) bli_saxmyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cdcaxmys( a, x, y ) bli_caxmyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zdcaxmys( a, x, y ) bli_caxmyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_sccaxmys( a, x, y ) bli_scaxmyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dccaxmys( a, x, y ) bli_scaxmyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cccaxmys( a, x, y ) bli_caxmyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zccaxmys( a, x, y ) bli_caxmyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_szcaxmys( a, x, y ) bli_scaxmyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dzcaxmys( a, x, y ) bli_scaxmyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_czcaxmys( a, x, y ) bli_caxmyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zzcaxmys( a, x, y ) bli_caxmyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszaxmys( a, x, y ) bli_daxmyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dszaxmys( a, x, y ) bli_daxmyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cszaxmys( a, x, y ) bli_zaxmyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zszaxmys( a, x, y ) bli_zaxmyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_sdzaxmys( a, x, y ) bli_daxmyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_ddzaxmys( a, x, y ) bli_daxmyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cdzaxmys( a, x, y ) bli_zaxmyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zdzaxmys( a, x, y ) bli_zaxmyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_sczaxmys( a, x, y ) bli_dzaxmyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dczaxmys( a, x, y ) bli_dzaxmyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cczaxmys( a, x, y ) bli_zaxmyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zczaxmys( a, x, y ) bli_zaxmyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_szzaxmys( a, x, y ) bli_dzaxmyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dzzaxmys( a, x, y ) bli_dzaxmyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_czzaxmys( a, x, y ) bli_zaxmyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zzzaxmys( a, x, y ) bli_zaxmyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_dscaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_cscaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_zscaxmys( a, x, y ) { (y) -= (a) * (x); } - -#define bli_sdcaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_ddcaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_cdcaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_zdcaxmys( a, x, y ) { (y) -= (a) * (x); } - -#define bli_sccaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_dccaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_cccaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_zccaxmys( a, x, y ) { (y) -= (a) * (x); } - -#define bli_szcaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_dzcaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_czcaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_zzcaxmys( a, x, y ) { (y) -= (a) * (x); } - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_dszaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_cszaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_zszaxmys( a, x, y ) { (y) -= (a) * (x); } - -#define bli_sdzaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_ddzaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_cdzaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_zdzaxmys( a, x, y ) { (y) -= (a) * (x); } - -#define bli_sczaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_dczaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_cczaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_zczaxmys( a, x, y ) { (y) -= (a) * (x); } - -#define bli_szzaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_dzzaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_czzaxmys( a, x, y ) { (y) -= (a) * (x); } -#define bli_zzzaxmys( a, x, y ) { (y) -= (a) * (x); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_saxmys( a, x, y ) bli_sssaxmys( a, x, y ) -#define bli_daxmys( a, x, y ) bli_dddaxmys( a, x, y ) -#define bli_caxmys( a, x, y ) bli_cccaxmys( a, x, y ) -#define bli_zaxmys( a, x, y ) bli_zzzaxmys( a, x, y ) - - -#endif - diff --git a/frame/include/level0/bli_axpbyjs.h b/frame/include/level0/bli_axpbyjs.h deleted file mode 100644 index 8f229f1d9e..0000000000 --- a/frame/include/level0/bli_axpbyjs.h +++ /dev/null @@ -1,480 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_AXPBYJS_H -#define BLIS_AXPBYJS_H - -// axpbyjs - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of b. -// - The fourth char encodes the type of y. - -// -- (axby) = (???s) ---------------------------------------------------------- - -#define bli_ssssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dsssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_csssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zsssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_sdssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_ddssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cdssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zdssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_scssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dcssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_ccssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zcssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_szssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dzssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_czssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zzssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) - -#define bli_ssdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dsdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_csdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zsdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_sddsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dddsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cddsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zddsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_scdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dcdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_ccdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zcdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_szdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dzdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_czdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zzdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) - -#define bli_sscsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dscsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cscsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zscsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_sdcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_ddcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cdcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zdcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_sccsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dccsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cccsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zccsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_szcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dzcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_czcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zzcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) - -#define bli_sszsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dszsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cszsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zszsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_sdzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_ddzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cdzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zdzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_sczsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dczsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cczsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zczsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_szzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dzzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_czzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zzzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) - -// -- (axby) = (???d) ---------------------------------------------------------- - -#define bli_sssdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dssdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cssdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zssdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_sdsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_ddsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cdsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zdsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_scsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dcsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_ccsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zcsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_szsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dzsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_czsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zzsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) - -#define bli_ssddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dsddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_csddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zsddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_sdddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_ddddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cdddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zdddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_scddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dcddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_ccddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zcddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_szddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dzddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_czddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zzddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) - -#define bli_sscdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dscdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cscdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zscdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_sdcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_ddcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cdcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zdcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_sccdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dccdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cccdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zccdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_szcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dzcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_czcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zzcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) - -#define bli_sszdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dszdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cszdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zszdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_sdzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_ddzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cdzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zdzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_sczdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dczdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cczdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zczdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_szzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dzzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_czzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zzzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -// -- (axby) = (???c) ---------------------------------------------------------- - -#define bli_ssscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dsscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_csscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zsscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_sdscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_ddscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cdscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zdscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_scscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dcscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_ccscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zcscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_szscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dzscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_czscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zzscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) - -#define bli_ssdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dsdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_csdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zsdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_sddcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dddcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cddcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zddcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_scdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dcdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_ccdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zcdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_szdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dzdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_czdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zzdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) - -#define bli_ssccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dsccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_csccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zsccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_sdccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_ddccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cdccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zdccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_scccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dcccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_ccccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zcccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_szccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dzccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_czccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zzccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) - -#define bli_sszcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dszcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cszcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zszcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_sdzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_ddzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cdzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zdzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_sczcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dczcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cczcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zczcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_szzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dzzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_czzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zzzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) - -// -- (axby) = (???z) ---------------------------------------------------------- - -#define bli_ssszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dsszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_csszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zsszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_sdszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_ddszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cdszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zdszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_scszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dcszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_ccszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zcszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_szszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dzszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_czszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zzszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) - -#define bli_ssdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dsdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_csdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zsdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_sddzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dddzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cddzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zddzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_scdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dcdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_ccdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zcdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_szdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dzdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_czdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zzdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) - -#define bli_ssczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dsczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_csczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zsczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_sdczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_ddczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cdczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zdczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_scczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dcczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_ccczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zcczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_szczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dzczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_czczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zzczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) - -#define bli_sszzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dszzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cszzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zszzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_sdzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_ddzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cdzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zdzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_sczzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dczzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cczzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zczzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_szzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dzzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_czzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zzzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -// -- (axby) = (???c) ---------------------------------------------------------- - -#define bli_ssscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dsscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_csscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zsscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sdscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ddscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cdscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zdscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_scscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_dcscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_ccscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_zcscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_szscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_dzscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_czscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_zzscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } - -#define bli_ssdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dsdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_csdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zsdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_scdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_dcdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_ccdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_zcdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_szdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_dzdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_czdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_zzdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } - -#define bli_ssccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dsccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_csccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zsccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sdccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ddccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cdccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zdccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_scccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_dcccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_ccccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_zcccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_szccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_dzccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_czccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_zzccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } - -#define bli_sszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sdzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ddzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cdzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zdzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_dczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_cczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_zczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_szzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_dzzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_czzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_zzzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } - -// -- (axby) = (???z) ---------------------------------------------------------- - -#define bli_ssszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dsszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_csszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zsszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sdszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ddszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cdszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zdszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_scszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_dcszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_ccszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_zcszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_szszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_dzszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_czszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_zzszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } - -#define bli_ssdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dsdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_csdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zsdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_scdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_dcdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_ccdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_zcdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_szdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_dzdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_czdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_zzdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } - -#define bli_ssczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dsczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_csczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zsczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sdczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ddczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cdczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zdczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_scczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_dcczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_ccczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_zcczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_szczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_dzczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_czczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_zzczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } - -#define bli_sszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sdzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ddzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cdzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zdzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_dczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_cczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_zczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } -#define bli_szzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_dzzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_czzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } -#define bli_zzzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_saxpbyjs( a, x, b, y ) bli_ssssaxpbyjs( a, x, b, y ) -#define bli_daxpbyjs( a, x, b, y ) bli_ddddaxpbyjs( a, x, b, y ) -#define bli_caxpbyjs( a, x, b, y ) bli_ccccaxpbyjs( a, x, b, y ) -#define bli_zaxpbyjs( a, x, b, y ) bli_zzzzaxpbyjs( a, x, b, y ) - - -#endif - diff --git a/frame/include/level0/bli_axpbys.h b/frame/include/level0/bli_axpbys.h deleted file mode 100644 index 4114089320..0000000000 --- a/frame/include/level0/bli_axpbys.h +++ /dev/null @@ -1,480 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_AXPBYS_H -#define BLIS_AXPBYS_H - -// axpbys - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of b. -// - The fourth char encodes the type of y. - -// -- (axby) = (???s) ---------------------------------------------------------- - -#define bli_ssssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dsssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_csssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zsssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_sdssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_ddssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cdssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zdssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_scssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dcssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_ccssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zcssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_szssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dzssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_czssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zzssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) - -#define bli_ssdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dsdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_csdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zsdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_sddsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dddsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cddsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zddsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_scdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dcdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_ccdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zcdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_szdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dzdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_czdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zzdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) - -#define bli_sscsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dscsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cscsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zscsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_sdcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_ddcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cdcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zdcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_sccsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dccsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cccsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zccsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_szcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dzcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_czcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zzcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) - -#define bli_sszsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dszsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cszsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zszsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_sdzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_ddzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cdzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zdzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_sczsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dczsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cczsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zczsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_szzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dzzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_czzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zzzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) - -// -- (axby) = (???d) ---------------------------------------------------------- - -#define bli_sssdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dssdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cssdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zssdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_sdsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_ddsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cdsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zdsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_scsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dcsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_ccsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zcsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_szsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dzsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_czsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zzsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) - -#define bli_ssddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dsddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_csddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zsddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_sdddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_ddddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cdddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zdddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_scddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dcddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_ccddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zcddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_szddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dzddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_czddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zzddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) - -#define bli_sscdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dscdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cscdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zscdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_sdcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_ddcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cdcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zdcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_sccdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dccdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cccdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zccdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_szcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dzcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_czcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zzcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) - -#define bli_sszdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dszdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cszdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zszdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_sdzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_ddzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cdzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zdzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_sczdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dczdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cczdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zczdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_szzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dzzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_czzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zzzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -// -- (axby) = (???c) ---------------------------------------------------------- - -#define bli_ssscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dsscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_csscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zsscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_sdscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_ddscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cdscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zdscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_scscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dcscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_ccscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zcscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_szscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dzscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_czscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zzscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) - -#define bli_ssdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dsdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_csdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zsdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_sddcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dddcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cddcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zddcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_scdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dcdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_ccdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zcdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_szdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dzdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_czdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zzdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) - -#define bli_ssccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dsccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_csccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zsccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_sdccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_ddccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cdccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zdccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_scccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dcccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_ccccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zcccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_szccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dzccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_czccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zzccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) - -#define bli_sszcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dszcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cszcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zszcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_sdzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_ddzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cdzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zdzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_sczcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dczcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cczcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zczcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_szzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dzzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_czzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zzzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) - -// -- (axby) = (???z) ---------------------------------------------------------- - -#define bli_ssszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dsszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_csszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zsszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_sdszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_ddszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cdszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zdszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_scszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dcszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_ccszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zcszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_szszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dzszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_czszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zzszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) - -#define bli_ssdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dsdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_csdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zsdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_sddzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dddzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cddzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zddzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_scdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dcdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_ccdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zcdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_szdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dzdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_czdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zzdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) - -#define bli_ssczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dsczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_csczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zsczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_sdczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_ddczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cdczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zdczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_scczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dcczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_ccczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zcczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_szczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dzczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_czczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zzczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) - -#define bli_sszzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dszzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cszzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zszzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_sdzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_ddzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cdzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zdzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_sczzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dczzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cczzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zczzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_szzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dzzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_czzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zzzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -// -- (axby) = (???c) ---------------------------------------------------------- - -#define bli_ssscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dsscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_csscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zsscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sdscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ddscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cdscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zdscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_scscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dcscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ccscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zcscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_szscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dzscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_czscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zzscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } - -#define bli_ssdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dsdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_csdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zsdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_scdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dcdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ccdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zcdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_szdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dzdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_czdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zzdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } - -#define bli_ssccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dsccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_csccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zsccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sdccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ddccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cdccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zdccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_scccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dcccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ccccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zcccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_szccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dzccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_czccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zzccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } - -#define bli_sszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sdzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ddzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cdzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zdzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_szzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dzzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_czzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zzzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } - -// -- (axby) = (???z) ---------------------------------------------------------- - -#define bli_ssszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dsszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_csszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zsszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sdszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ddszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cdszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zdszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_scszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dcszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ccszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zcszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_szszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dzszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_czszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zzszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } - -#define bli_ssdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dsdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_csdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zsdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_scdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dcdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ccdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zcdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_szdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dzdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_czdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zzdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } - -#define bli_ssczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dsczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_csczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zsczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sdczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ddczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cdczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zdczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_scczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dcczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ccczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zcczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_szczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dzczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_czczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zzczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } - -#define bli_sszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sdzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_ddzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cdzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zdzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_sczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_cczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_szzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_dzzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_czzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } -#define bli_zzzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_saxpbys( a, x, b, y ) bli_ssssaxpbys( a, x, b, y ) -#define bli_daxpbys( a, x, b, y ) bli_ddddaxpbys( a, x, b, y ) -#define bli_caxpbys( a, x, b, y ) bli_ccccaxpbys( a, x, b, y ) -#define bli_zaxpbys( a, x, b, y ) bli_zzzzaxpbys( a, x, b, y ) - - -#endif - diff --git a/frame/include/level0/bli_axpbys_mxn.h b/frame/include/level0/bli_axpbys_mxn.h deleted file mode 100644 index 494c5d4456..0000000000 --- a/frame/include/level0/bli_axpbys_mxn.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_AXPBYS_MXN_H -#define BLIS_AXPBYS_MXN_H - -// axpbys_mxn - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of b. -// - The fourth char encodes the type of y. -// - We only implement cases where typeof(a) == type(x) && typeof(b) == typeof(y). - -#undef BLIS_ENABLE_CR_CASES -#define BLIS_ENABLE_CR_CASES 0 - -// -- bli_????axpbys_mxn -- - -#undef GENTFUNC2 -#define GENTFUNC2( ctypex, ctypey, chx, chy, opname, kername ) \ -\ -BLIS_INLINE void PASTEMAC(chx,chx,chy,chy,opname) \ - ( \ - const dim_t m, \ - const dim_t n, \ - const ctypex* alpha, \ - const ctypex* x, inc_t rs_x, inc_t cs_x, \ - const ctypey* beta, \ - ctypey* y, inc_t rs_y, inc_t cs_y \ - ) \ -{ \ - /* If beta is zero, overwrite y with alpha*x (in case y has infs or NaNs). */ \ - if ( PASTEMAC(chy,eq0)( *beta ) ) \ - { \ - PASTEMAC(chx,chx,chy,scal2s_mxn)( BLIS_NO_CONJUGATE, m, n, alpha, x, rs_x, cs_x, y, rs_y, cs_y ); \ - return; \ - } \ -\ - if ( BLIS_ENABLE_CR_CASES && rs_x == 1 && rs_y == 1 ) \ - { \ - for ( dim_t jj = 0; jj < n; ++jj ) \ - for ( dim_t ii = 0; ii < m; ++ii ) \ - PASTEMAC(chx,chx,chy,chy,kername) \ - ( \ - *alpha, *(x + ii + jj*cs_x), \ - *beta, *(y + ii + jj*cs_y) \ - ); \ - } \ - else if ( BLIS_ENABLE_CR_CASES && cs_x == 1 && cs_y == 1 ) \ - { \ - for ( dim_t ii = 0; ii < m; ++ii ) \ - for ( dim_t jj = 0; jj < n; ++jj ) \ - PASTEMAC(chx,chx,chy,chy,kername) \ - ( \ - *alpha, *(x + ii*rs_x + jj), \ - *beta, *(y + ii*rs_y + jj) \ - ); \ - } \ - else \ - { \ - for ( dim_t jj = 0; jj < n; ++jj ) \ - for ( dim_t ii = 0; ii < m; ++ii ) \ - PASTEMAC(chx,chx,chy,chy,kername) \ - ( \ - *alpha, *(x + ii*rs_x + jj*cs_x), \ - *beta, *(y + ii*rs_y + jj*cs_y) \ - ); \ - } \ -} - -INSERT_GENTFUNC2_BASIC ( axpbys_mxn, axpbys ) -INSERT_GENTFUNC2_MIX_DP( axpbys_mxn, axpbys ) - - -// -- bli_?axpbys_mxn -- - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname ) \ -\ -BLIS_INLINE void PASTEMAC(ch,opname) \ - ( \ - const dim_t m, \ - const dim_t n, \ - const ctype* alpha, \ - const ctype* x, inc_t rs_x, inc_t cs_x, \ - const ctype* beta, \ - ctype* y, inc_t rs_y, inc_t cs_y \ - ) \ -{ \ - PASTEMAC(ch,ch,ch,ch,opname)( m, n, alpha, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ -} - -INSERT_GENTFUNC_BASIC( axpbys_mxn ) - - -#endif diff --git a/frame/include/level0/bli_axpyjs.h b/frame/include/level0/bli_axpyjs.h deleted file mode 100644 index 4b2c640a48..0000000000 --- a/frame/include/level0/bli_axpyjs.h +++ /dev/null @@ -1,192 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_AXPYJS_H -#define BLIS_AXPYJS_H - -// axpyjs - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of y. - - -// -- (axy) = (??s) ------------------------------------------------------------ - -#define bli_sssaxpyjs( a, x, y ) bli_saxpyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dssaxpyjs( a, x, y ) bli_saxpyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cssaxpyjs( a, x, y ) bli_saxpyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zssaxpyjs( a, x, y ) bli_saxpyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_sdsaxpyjs( a, x, y ) bli_saxpyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_ddsaxpyjs( a, x, y ) bli_saxpyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cdsaxpyjs( a, x, y ) bli_saxpyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zdsaxpyjs( a, x, y ) bli_saxpyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_scsaxpyjs( a, x, y ) bli_saxpyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dcsaxpyjs( a, x, y ) bli_saxpyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_ccsaxpyjs( a, x, y ) bli_saxpyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zcsaxpyjs( a, x, y ) bli_saxpyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_szsaxpyjs( a, x, y ) bli_saxpyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dzsaxpyjs( a, x, y ) bli_saxpyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_czsaxpyjs( a, x, y ) bli_saxpyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zzsaxpyjs( a, x, y ) bli_saxpyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) - -// -- (axy) = (??d) ------------------------------------------------------------ - -#define bli_ssdaxpyjs( a, x, y ) bli_daxpyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dsdaxpyjs( a, x, y ) bli_daxpyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_csdaxpyjs( a, x, y ) bli_daxpyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zsdaxpyjs( a, x, y ) bli_daxpyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_sddaxpyjs( a, x, y ) bli_daxpyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dddaxpyjs( a, x, y ) bli_daxpyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_cddaxpyjs( a, x, y ) bli_daxpyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zddaxpyjs( a, x, y ) bli_daxpyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_scdaxpyjs( a, x, y ) bli_daxpyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dcdaxpyjs( a, x, y ) bli_daxpyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_ccdaxpyjs( a, x, y ) bli_daxpyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zcdaxpyjs( a, x, y ) bli_daxpyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_szdaxpyjs( a, x, y ) bli_daxpyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dzdaxpyjs( a, x, y ) bli_daxpyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_czdaxpyjs( a, x, y ) bli_daxpyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zzdaxpyjs( a, x, y ) bli_daxpyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscaxpyjs( a, x, y ) bli_saxpyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dscaxpyjs( a, x, y ) bli_saxpyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cscaxpyjs( a, x, y ) bli_caxpyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zscaxpyjs( a, x, y ) bli_caxpyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_sdcaxpyjs( a, x, y ) bli_saxpyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_ddcaxpyjs( a, x, y ) bli_saxpyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cdcaxpyjs( a, x, y ) bli_caxpyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zdcaxpyjs( a, x, y ) bli_caxpyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_sccaxpyjs( a, x, y ) bli_scaxpyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dccaxpyjs( a, x, y ) bli_scaxpyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cccaxpyjs( a, x, y ) bli_caxpyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zccaxpyjs( a, x, y ) bli_caxpyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_szcaxpyjs( a, x, y ) bli_scaxpyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dzcaxpyjs( a, x, y ) bli_scaxpyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_czcaxpyjs( a, x, y ) bli_caxpyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zzcaxpyjs( a, x, y ) bli_caxpyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszaxpyjs( a, x, y ) bli_daxpyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dszaxpyjs( a, x, y ) bli_daxpyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cszaxpyjs( a, x, y ) bli_zaxpyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zszaxpyjs( a, x, y ) bli_zaxpyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_sdzaxpyjs( a, x, y ) bli_daxpyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_ddzaxpyjs( a, x, y ) bli_daxpyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cdzaxpyjs( a, x, y ) bli_zaxpyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zdzaxpyjs( a, x, y ) bli_zaxpyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_sczaxpyjs( a, x, y ) bli_dzaxpyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dczaxpyjs( a, x, y ) bli_dzaxpyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cczaxpyjs( a, x, y ) bli_zaxpyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zczaxpyjs( a, x, y ) bli_zaxpyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_szzaxpyjs( a, x, y ) bli_dzaxpyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dzzaxpyjs( a, x, y ) bli_dzaxpyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_czzaxpyjs( a, x, y ) bli_zaxpyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zzzaxpyjs( a, x, y ) bli_zaxpyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscaxpyjs( a, x, y ) { (y) += (a) * (x); } -#define bli_dscaxpyjs( a, x, y ) { (y) += (a) * (x); } -#define bli_cscaxpyjs( a, x, y ) { (y) += (a) * (x); } -#define bli_zscaxpyjs( a, x, y ) { (y) += (a) * (x); } - -#define bli_sdcaxpyjs( a, x, y ) { (y) += (a) * (x); } -#define bli_ddcaxpyjs( a, x, y ) { (y) += (a) * (x); } -#define bli_cdcaxpyjs( a, x, y ) { (y) += (a) * (x); } -#define bli_zdcaxpyjs( a, x, y ) { (y) += (a) * (x); } - -#define bli_sccaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } -#define bli_dccaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } -#define bli_cccaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } -#define bli_zccaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } - -#define bli_szcaxpyjs( a, x, y ) { (y) += (a) * conj(x); } -#define bli_dzcaxpyjs( a, x, y ) { (y) += (a) * conj(x); } -#define bli_czcaxpyjs( a, x, y ) { (y) += (a) * conj(x); } -#define bli_zzcaxpyjs( a, x, y ) { (y) += (a) * conj(x); } - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszaxpyjs( a, x, y ) { (y) += (a) * (x); } -#define bli_dszaxpyjs( a, x, y ) { (y) += (a) * (x); } -#define bli_cszaxpyjs( a, x, y ) { (y) += (a) * (x); } -#define bli_zszaxpyjs( a, x, y ) { (y) += (a) * (x); } - -#define bli_sdzaxpyjs( a, x, y ) { (y) += (a) * (x); } -#define bli_ddzaxpyjs( a, x, y ) { (y) += (a) * (x); } -#define bli_cdzaxpyjs( a, x, y ) { (y) += (a) * (x); } -#define bli_zdzaxpyjs( a, x, y ) { (y) += (a) * (x); } - -#define bli_sczaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } -#define bli_dczaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } -#define bli_cczaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } -#define bli_zczaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } - -#define bli_szzaxpyjs( a, x, y ) { (y) += (a) * conj(x); } -#define bli_dzzaxpyjs( a, x, y ) { (y) += (a) * conj(x); } -#define bli_czzaxpyjs( a, x, y ) { (y) += (a) * conj(x); } -#define bli_zzzaxpyjs( a, x, y ) { (y) += (a) * conj(x); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_saxpyjs( a, x, y ) bli_sssaxpyjs( a, x, y ) -#define bli_daxpyjs( a, x, y ) bli_dddaxpyjs( a, x, y ) -#define bli_caxpyjs( a, x, y ) bli_cccaxpyjs( a, x, y ) -#define bli_zaxpyjs( a, x, y ) bli_zzzaxpyjs( a, x, y ) - - -#endif - diff --git a/frame/include/level0/bli_axpys.h b/frame/include/level0/bli_axpys.h deleted file mode 100644 index fb6871b4bf..0000000000 --- a/frame/include/level0/bli_axpys.h +++ /dev/null @@ -1,192 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_AXPYS_H -#define BLIS_AXPYS_H - -// axpys - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of y. - - -// -- (axy) = (??s) ------------------------------------------------------------ - -#define bli_sssaxpys( a, x, y ) bli_saxpyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dssaxpys( a, x, y ) bli_saxpyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cssaxpys( a, x, y ) bli_saxpyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zssaxpys( a, x, y ) bli_saxpyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_sdsaxpys( a, x, y ) bli_saxpyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_ddsaxpys( a, x, y ) bli_saxpyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cdsaxpys( a, x, y ) bli_saxpyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zdsaxpys( a, x, y ) bli_saxpyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_scsaxpys( a, x, y ) bli_saxpyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dcsaxpys( a, x, y ) bli_saxpyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_ccsaxpys( a, x, y ) bli_saxpyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zcsaxpys( a, x, y ) bli_saxpyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_szsaxpys( a, x, y ) bli_saxpyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dzsaxpys( a, x, y ) bli_saxpyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_czsaxpys( a, x, y ) bli_saxpyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zzsaxpys( a, x, y ) bli_saxpyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) - -// -- (axy) = (??d) ------------------------------------------------------------ - -#define bli_ssdaxpys( a, x, y ) bli_daxpyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dsdaxpys( a, x, y ) bli_daxpyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_csdaxpys( a, x, y ) bli_daxpyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zsdaxpys( a, x, y ) bli_daxpyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_sddaxpys( a, x, y ) bli_daxpyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dddaxpys( a, x, y ) bli_daxpyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_cddaxpys( a, x, y ) bli_daxpyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zddaxpys( a, x, y ) bli_daxpyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_scdaxpys( a, x, y ) bli_daxpyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dcdaxpys( a, x, y ) bli_daxpyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_ccdaxpys( a, x, y ) bli_daxpyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zcdaxpys( a, x, y ) bli_daxpyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_szdaxpys( a, x, y ) bli_daxpyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dzdaxpys( a, x, y ) bli_daxpyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_czdaxpys( a, x, y ) bli_daxpyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zzdaxpys( a, x, y ) bli_daxpyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscaxpys( a, x, y ) bli_saxpyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dscaxpys( a, x, y ) bli_saxpyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cscaxpys( a, x, y ) bli_caxpyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zscaxpys( a, x, y ) bli_caxpyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_sdcaxpys( a, x, y ) bli_saxpyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_ddcaxpys( a, x, y ) bli_saxpyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cdcaxpys( a, x, y ) bli_caxpyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zdcaxpys( a, x, y ) bli_caxpyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_sccaxpys( a, x, y ) bli_scaxpyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dccaxpys( a, x, y ) bli_scaxpyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cccaxpys( a, x, y ) bli_caxpyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zccaxpys( a, x, y ) bli_caxpyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_szcaxpys( a, x, y ) bli_scaxpyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dzcaxpys( a, x, y ) bli_scaxpyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_czcaxpys( a, x, y ) bli_caxpyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zzcaxpys( a, x, y ) bli_caxpyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszaxpys( a, x, y ) bli_daxpyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dszaxpys( a, x, y ) bli_daxpyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cszaxpys( a, x, y ) bli_zaxpyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zszaxpys( a, x, y ) bli_zaxpyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_sdzaxpys( a, x, y ) bli_daxpyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_ddzaxpys( a, x, y ) bli_daxpyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cdzaxpys( a, x, y ) bli_zaxpyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zdzaxpys( a, x, y ) bli_zaxpyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_sczaxpys( a, x, y ) bli_dzaxpyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dczaxpys( a, x, y ) bli_dzaxpyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cczaxpys( a, x, y ) bli_zaxpyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zczaxpys( a, x, y ) bli_zaxpyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_szzaxpys( a, x, y ) bli_dzaxpyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dzzaxpys( a, x, y ) bli_dzaxpyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_czzaxpys( a, x, y ) bli_zaxpyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zzzaxpys( a, x, y ) bli_zaxpyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_dscaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_cscaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_zscaxpys( a, x, y ) { (y) += (a) * (x); } - -#define bli_sdcaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_ddcaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_cdcaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_zdcaxpys( a, x, y ) { (y) += (a) * (x); } - -#define bli_sccaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_dccaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_cccaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_zccaxpys( a, x, y ) { (y) += (a) * (x); } - -#define bli_szcaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_dzcaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_czcaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_zzcaxpys( a, x, y ) { (y) += (a) * (x); } - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_dszaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_cszaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_zszaxpys( a, x, y ) { (y) += (a) * (x); } - -#define bli_sdzaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_ddzaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_cdzaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_zdzaxpys( a, x, y ) { (y) += (a) * (x); } - -#define bli_sczaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_dczaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_cczaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_zczaxpys( a, x, y ) { (y) += (a) * (x); } - -#define bli_szzaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_dzzaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_czzaxpys( a, x, y ) { (y) += (a) * (x); } -#define bli_zzzaxpys( a, x, y ) { (y) += (a) * (x); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_saxpys( a, x, y ) bli_sssaxpys( a, x, y ) -#define bli_daxpys( a, x, y ) bli_dddaxpys( a, x, y ) -#define bli_caxpys( a, x, y ) bli_cccaxpys( a, x, y ) -#define bli_zaxpys( a, x, y ) bli_zzzaxpys( a, x, y ) - - -#endif - diff --git a/frame/include/level0/bli_lt.h b/frame/include/level0/bli_complex_terms.h similarity index 60% rename from frame/include/level0/bli_lt.h rename to frame/include/level0/bli_complex_terms.h index b7c68ddaa6..0cf05c30c5 100644 --- a/frame/include/level0/bli_lt.h +++ b/frame/include/level0/bli_complex_terms.h @@ -32,39 +32,40 @@ */ -#ifndef BLIS_LT_H -#define BLIS_LT_H - - -// lt (passed by value) - -#define bli_slt( a, b ) ( (a) < (b) ) -#define bli_dlt( a, b ) ( (a) < (b) ) -#define bli_clt( a, b ) ( bli_creal(a) < bli_creal(b) ) -#define bli_zlt( a, b ) ( bli_zreal(a) < bli_zreal(b) ) -#define bli_ilt( a, b ) ( (a) < (b) ) - -// lt0 - -#define bli_slt0( a ) ( (a) < 0.0F ) -#define bli_dlt0( a ) ( (a) < 0.0 ) -#define bli_clt0( a ) ( bli_creal(a) < 0.0F ) -#define bli_zlt0( a ) ( bli_zreal(a) < 0.0 ) - -// gt (passed by value) - -#define bli_sgt( a, b ) ( (a) > (b) ) -#define bli_dgt( a, b ) ( (a) > (b) ) -#define bli_cgt( a, b ) ( bli_creal(a) > bli_creal(b) ) -#define bli_zgt( a, b ) ( bli_zreal(a) > bli_zreal(b) ) -#define bli_igt( a, b ) ( (a) > (b) ) - -// gt0 - -#define bli_sgt0( a ) ( (a) > 0.0F ) -#define bli_dgt0( a ) ( (a) > 0.0 ) -#define bli_cgt0( a ) ( bli_creal(a) > 0.0F ) -#define bli_zgt0( a ) ( bli_zreal(a) > 0.0 ) +#ifndef BLIS_COMPLEX_TERMS_H +#define BLIS_COMPLEX_TERMS_H + + +// -- Complex term-zeroing macros ---------------------------------------------- + +// Note: +// - pab is the precision of the A_[ri] * B_[ri] product. It is only used in +// certain cases where we need to decide which precision of zero to substitute +// into the calling expression. + +// ar * br term +#define bli_rrtermrr( pab, ab ) ab +#define bli_rctermrr( pab, ab ) ab +#define bli_crtermrr( pab, ab ) ab +#define bli_cctermrr( pab, ab ) ab + +// ai * bi term +#define bli_rrtermii( pab, ab ) PASTEMAC(pab,zero) +#define bli_rctermii( pab, ab ) PASTEMAC(pab,zero) +#define bli_crtermii( pab, ab ) PASTEMAC(pab,zero) +#define bli_cctermii( pab, ab ) ab + +// ai * br term +#define bli_rrtermir( pab, ab ) PASTEMAC(pab,zero) +#define bli_rctermir( pab, ab ) PASTEMAC(pab,zero) +#define bli_crtermir( pab, ab ) ab +#define bli_cctermir( pab, ab ) ab + +// ar * bi term +#define bli_rrtermri( pab, ab ) PASTEMAC(pab,zero) +#define bli_rctermri( pab, ab ) ab +#define bli_crtermri( pab, ab ) PASTEMAC(pab,zero) +#define bli_cctermri( pab, ab ) ab diff --git a/frame/include/level0/bli_conjs.h b/frame/include/level0/bli_conjs.h deleted file mode 100644 index 241148825f..0000000000 --- a/frame/include/level0/bli_conjs.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONJS_H -#define BLIS_CONJS_H - -// conjs - -#define bli_sconjs( x ) bli_sconjris( bli_sreal(x), bli_simag(x) ) -#define bli_dconjs( x ) bli_dconjris( bli_dreal(x), bli_dimag(x) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_cconjs( x ) bli_cconjris( bli_creal(x), bli_cimag(x) ) -#define bli_zconjs( x ) bli_zconjris( bli_zreal(x), bli_zimag(x) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_cconjs( x ) { (x) = conjf(x); } -#define bli_zconjs( x ) { (x) = conj (x); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#endif - diff --git a/frame/include/level0/bli_copycjs.h b/frame/include/level0/bli_copycjs.h deleted file mode 100644 index f6056ad1ec..0000000000 --- a/frame/include/level0/bli_copycjs.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPYCJS_H -#define BLIS_COPYCJS_H - -// copycjs - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#define bli_sscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_sdcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_ddcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_cdcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zdcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_sccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_szcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dzcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_czcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zzcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_sccopycjs( conjx, x, y ) { (y) = (x); } -#define bli_dccopycjs( conjx, x, y ) { (y) = (x); } -#define bli_cccopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conjf(x) : (x) ); } -#define bli_zccopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conj (x) : (x) ); } - -#define bli_szcopycjs( conjx, x, y ) { (y) = (x); } -#define bli_dzcopycjs( conjx, x, y ) { (y) = (x); } -#define bli_czcopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conjf(x) : (x) ); } -#define bli_zzcopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conj (x) : (x) ); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_iicopycjs( conjx, x, y ) { (y) = ( gint_t ) (x); } - - -#define bli_scopycjs( conjx, x, y ) bli_sscopycjs( conjx, x, y ) -#define bli_dcopycjs( conjx, x, y ) bli_ddcopycjs( conjx, x, y ) -#define bli_ccopycjs( conjx, x, y ) bli_cccopycjs( conjx, x, y ) -#define bli_zcopycjs( conjx, x, y ) bli_zzcopycjs( conjx, x, y ) -#define bli_icopycjs( conjx, x, y ) bli_iicopycjs( conjx, x, y ) - - -#endif - diff --git a/frame/include/level0/bli_copyjnzs.h b/frame/include/level0/bli_copyjnzs.h deleted file mode 100644 index 048525a180..0000000000 --- a/frame/include/level0/bli_copyjnzs.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPYJNZS_H -#define BLIS_COPYJNZS_H - -// copyjnzs - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#define bli_sscopyjnzs( x, y ) bli_scopyjris( bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dscopyjnzs( x, y ) bli_scopyjris( bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cscopyjnzs( x, y ) bli_scopyjris( bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zscopyjnzs( x, y ) bli_scopyjris( bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_sdcopyjnzs( x, y ) bli_dcopyjris( bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_ddcopyjnzs( x, y ) bli_dcopyjris( bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_cdcopyjnzs( x, y ) bli_dcopyjris( bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zdcopyjnzs( x, y ) bli_dcopyjris( bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) - -// NOTE: Use of scopyjris() (implemented in terms of scopyris()), is so we -// don't touch the imaginary part of y. -#define bli_sccopyjnzs( x, y ) bli_scopyjris( bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dccopyjnzs( x, y ) bli_scopyjris( bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cccopyjnzs( x, y ) bli_ccopyjris( bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zccopyjnzs( x, y ) bli_ccopyjris( bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) - -// NOTE: Use of dcopyjris() (implemented in terms of dcopyris()), is so we -// don't touch the imaginary part of y. -#define bli_szcopyjnzs( x, y ) bli_dcopyjris( bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dzcopyjnzs( x, y ) bli_dcopyjris( bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_czcopyjnzs( x, y ) bli_zcopyjris( bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zzcopyjnzs( x, y ) bli_zcopyjris( bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) - - -#define bli_iicopyjnzs( x, y ) { (y) = ( gint_t ) (x); } - - -#define bli_scopyjnzs( x, y ) bli_sscopyjnzs( x, y ) -#define bli_dcopyjnzs( x, y ) bli_ddcopyjnzs( x, y ) -#define bli_ccopyjnzs( x, y ) bli_cccopyjnzs( x, y ) -#define bli_zcopyjnzs( x, y ) bli_zzcopyjnzs( x, y ) -#define bli_icopyjnzs( x, y ) bli_iicopyjnzs( x, y ) - - -#endif - diff --git a/frame/include/level0/bli_copyjs.h b/frame/include/level0/bli_copyjs.h deleted file mode 100644 index 7292e82fb8..0000000000 --- a/frame/include/level0/bli_copyjs.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPYJS_H -#define BLIS_COPYJS_H - -// copyjs - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#define bli_sscopyjs( x, y ) bli_scopyjris( bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dscopyjs( x, y ) bli_scopyjris( bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cscopyjs( x, y ) bli_scopyjris( bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zscopyjs( x, y ) bli_scopyjris( bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_sdcopyjs( x, y ) bli_dcopyjris( bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_ddcopyjs( x, y ) bli_dcopyjris( bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_cdcopyjs( x, y ) bli_dcopyjris( bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zdcopyjs( x, y ) bli_dcopyjris( bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_sccopyjs( x, y ) bli_ccopyjris( bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dccopyjs( x, y ) bli_ccopyjris( bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cccopyjs( x, y ) bli_ccopyjris( bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zccopyjs( x, y ) bli_ccopyjris( bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_szcopyjs( x, y ) bli_zcopyjris( bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dzcopyjs( x, y ) bli_zcopyjris( bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_czcopyjs( x, y ) bli_zcopyjris( bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zzcopyjs( x, y ) bli_zcopyjris( bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_sccopyjs( x, y ) { (y) = (x); } -#define bli_dccopyjs( x, y ) { (y) = (x); } -#define bli_cccopyjs( x, y ) { (y) = conjf(x); } -#define bli_zccopyjs( x, y ) { (y) = conj (x); } - -#define bli_szcopyjs( x, y ) { (y) = (x); } -#define bli_dzcopyjs( x, y ) { (y) = (x); } -#define bli_czcopyjs( x, y ) { (y) = conjf(x); } -#define bli_zzcopyjs( x, y ) { (y) = conj (x); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_iicopyjs( x, y ) { (y) = ( gint_t ) (x); } - - -#define bli_scopyjs( x, y ) bli_sscopyjs( x, y ) -#define bli_dcopyjs( x, y ) bli_ddcopyjs( x, y ) -#define bli_ccopyjs( x, y ) bli_cccopyjs( x, y ) -#define bli_zcopyjs( x, y ) bli_zzcopyjs( x, y ) -#define bli_icopyjs( x, y ) bli_iicopyjs( x, y ) - - -#endif - diff --git a/frame/include/level0/bli_copynzs.h b/frame/include/level0/bli_copynzs.h deleted file mode 100644 index aa5d786514..0000000000 --- a/frame/include/level0/bli_copynzs.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPYNZS_H -#define BLIS_COPYNZS_H - -// copynzs - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#define bli_sscopynzs( x, y ) bli_scopyris( bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dscopynzs( x, y ) bli_scopyris( bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cscopynzs( x, y ) bli_scopyris( bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zscopynzs( x, y ) bli_scopyris( bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_sdcopynzs( x, y ) bli_dcopyris( bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_ddcopynzs( x, y ) bli_dcopyris( bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_cdcopynzs( x, y ) bli_dcopyris( bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zdcopynzs( x, y ) bli_dcopyris( bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) - -// NOTE: Use of scopyris() is so we don't touch the imaginary part of y. -#define bli_sccopynzs( x, y ) bli_scopyris( bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dccopynzs( x, y ) bli_scopyris( bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cccopynzs( x, y ) bli_ccopyris( bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zccopynzs( x, y ) bli_ccopyris( bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) - -// NOTE: Use of dcopyris() is so we don't touch the imaginary part of y. -#define bli_szcopynzs( x, y ) bli_dcopyris( bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dzcopynzs( x, y ) bli_dcopyris( bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_czcopynzs( x, y ) bli_zcopyris( bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zzcopynzs( x, y ) bli_zcopyris( bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) - - -#define bli_iicopynzs( x, y ) { (y) = ( gint_t ) (x); } - - -#define bli_scopynzs( x, y ) bli_sscopynzs( x, y ) -#define bli_dcopynzs( x, y ) bli_ddcopynzs( x, y ) -#define bli_ccopynzs( x, y ) bli_cccopynzs( x, y ) -#define bli_zcopynzs( x, y ) bli_zzcopynzs( x, y ) -#define bli_icopynzs( x, y ) bli_iicopynzs( x, y ) - - -#endif - diff --git a/frame/include/level0/bli_copys.h b/frame/include/level0/bli_copys.h deleted file mode 100644 index fee51f4dc6..0000000000 --- a/frame/include/level0/bli_copys.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPYS_H -#define BLIS_COPYS_H - -// copys - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#define bli_sscopys( x, y ) bli_scopyris( bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dscopys( x, y ) bli_scopyris( bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cscopys( x, y ) bli_scopyris( bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zscopys( x, y ) bli_scopyris( bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_sdcopys( x, y ) bli_dcopyris( bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_ddcopys( x, y ) bli_dcopyris( bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_cdcopys( x, y ) bli_dcopyris( bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zdcopys( x, y ) bli_dcopyris( bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) - -// NOTE: Use of ccopyris() means the imaginary part of y will be overwritten with zero. -#define bli_sccopys( x, y ) bli_ccopyris( bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dccopys( x, y ) bli_ccopyris( bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cccopys( x, y ) bli_ccopyris( bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zccopys( x, y ) bli_ccopyris( bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) - -// NOTE: Use of zcopyris() means the imaginary part of y will be overwritten with zero. -#define bli_szcopys( x, y ) bli_zcopyris( bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dzcopys( x, y ) bli_zcopyris( bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_czcopys( x, y ) bli_zcopyris( bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zzcopys( x, y ) bli_zcopyris( bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) - - -#define bli_iicopys( x, y ) { (y) = ( gint_t ) (x); } - - -#define bli_scopys( x, y ) bli_sscopys( x, y ) -#define bli_dcopys( x, y ) bli_ddcopys( x, y ) -#define bli_ccopys( x, y ) bli_cccopys( x, y ) -#define bli_zcopys( x, y ) bli_zzcopys( x, y ) -#define bli_icopys( x, y ) bli_iicopys( x, y ) - - -#endif - diff --git a/frame/include/level0/bli_copys_mxn.h b/frame/include/level0/bli_copys_mxn.h deleted file mode 100644 index 4b729376a2..0000000000 --- a/frame/include/level0/bli_copys_mxn.h +++ /dev/null @@ -1,676 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPYS_MXN_H -#define BLIS_COPYS_MXN_H - -// copys_mxn - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#undef BLIS_ENABLE_CR_CASES -#define BLIS_ENABLE_CR_CASES 0 - -// -- bli_??copys_mxn -- - -#undef GENTFUNC2 -#define GENTFUNC2( ctypex, ctypey, chx, chy, opname, kername ) \ -\ -BLIS_INLINE void PASTEMAC(chx,chy,opname) \ - ( \ - const dim_t m, \ - const dim_t n, \ - const ctypex* x, inc_t rs_x, inc_t cs_x, \ - ctypey* y, inc_t rs_y, inc_t cs_y \ - ) \ -{ \ - if ( BLIS_ENABLE_CR_CASES && rs_x == 1 && rs_y == 1 ) \ - { \ - for ( dim_t jj = 0; jj < n; ++jj ) \ - for ( dim_t ii = 0; ii < m; ++ii ) \ - PASTEMAC(chx,chy,kername)( *(x + ii + jj*cs_x), \ - *(y + ii + jj*cs_y) ); \ - } \ - else if ( BLIS_ENABLE_CR_CASES && cs_x == 1 && cs_y == 1 ) \ - { \ - for ( dim_t ii = 0; ii < m; ++ii ) \ - for ( dim_t jj = 0; jj < n; ++jj ) \ - PASTEMAC(chx,chy,kername)( *(x + ii*rs_x + jj), \ - *(y + ii*rs_y + jj) ); \ - } \ - else \ - { \ - for ( dim_t jj = 0; jj < n; ++jj ) \ - for ( dim_t ii = 0; ii < m; ++ii ) \ - PASTEMAC(chx,chy,kername)( *(x + ii*rs_x + jj*cs_x), \ - *(y + ii*rs_y + jj*cs_y) ); \ - } \ -} - -INSERT_GENTFUNC2_BASIC ( copys_mxn, copys ) -INSERT_GENTFUNC2_MIX_DP( copys_mxn, copys ) - - -// -- bli_?copys_mxn -- - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname ) \ -\ -BLIS_INLINE void PASTEMAC(ch,opname) \ - ( \ - const dim_t m, \ - const dim_t n, \ - const ctype* x, inc_t rs_x, inc_t cs_x, \ - ctype* y, inc_t rs_y, inc_t cs_y \ - ) \ -{ \ - PASTEMAC(ch,ch,opname)( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ -} - -INSERT_GENTFUNC_BASIC( copys_mxn ) - - - - -#if 0 - -// xy = ?s - -BLIS_INLINE void bli_sscopys_mxn - ( - const dim_t m, - const dim_t n, - const float* restrict x, const inc_t rs_x, const inc_t cs_x, - float* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_sscopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_sscopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_sscopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_dscopys_mxn - ( - const dim_t m, - const dim_t n, - const double* restrict x, const inc_t rs_x, const inc_t cs_x, - float* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dscopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_dscopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dscopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_cscopys_mxn - ( - const dim_t m, - const dim_t n, - const scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - float* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_cscopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_cscopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_cscopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_zscopys_mxn - ( - const dim_t m, - const dim_t n, - const dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - float* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zscopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_zscopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zscopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} - -// xy = ?d - -BLIS_INLINE void bli_sdcopys_mxn - ( - const dim_t m, - const dim_t n, - const float* restrict x, const inc_t rs_x, const inc_t cs_x, - double* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_sdcopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_sdcopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_sdcopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_ddcopys_mxn - ( - const dim_t m, - const dim_t n, - const double* restrict x, const inc_t rs_x, const inc_t cs_x, - double* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_ddcopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_ddcopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_ddcopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_cdcopys_mxn - ( - const dim_t m, - const dim_t n, - const scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - double* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_cdcopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_cdcopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_cdcopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_zdcopys_mxn - ( - const dim_t m, - const dim_t n, - const dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - double* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zdcopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_zdcopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zdcopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} - -// xy = ?c - -BLIS_INLINE void bli_sccopys_mxn - ( - const dim_t m, - const dim_t n, - const float* restrict x, const inc_t rs_x, const inc_t cs_x, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_sccopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_sccopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_sccopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_dccopys_mxn - ( - const dim_t m, - const dim_t n, - const double* restrict x, const inc_t rs_x, const inc_t cs_x, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dccopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_dccopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dccopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_cccopys_mxn - ( - const dim_t m, - const dim_t n, - const scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_cccopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_cccopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_cccopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_zccopys_mxn - ( - const dim_t m, - const dim_t n, - const dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zccopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_zccopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zccopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} - -// xy = ?c - -BLIS_INLINE void bli_szcopys_mxn - ( - const dim_t m, - const dim_t n, - const float* restrict x, const inc_t rs_x, const inc_t cs_x, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_szcopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_szcopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_szcopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_dzcopys_mxn - ( - const dim_t m, - const dim_t n, - const double* restrict x, const inc_t rs_x, const inc_t cs_x, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dzcopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_dzcopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dzcopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_czcopys_mxn - ( - const dim_t m, - const dim_t n, - const scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_czcopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_czcopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_czcopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_zzcopys_mxn - ( - const dim_t m, - const dim_t n, - const dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zzcopys( *(x + ii + jj*cs_x), - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_zzcopys( *(x + ii*rs_x + jj), - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zzcopys( *(x + ii*rs_x + jj*cs_x), - *(y + ii*rs_y + jj*cs_y) ); - } -} - -BLIS_INLINE void bli_scopys_mxn - ( - const dim_t m, - const dim_t n, - const float* restrict x, const inc_t rs_x, const inc_t cs_x, - float* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - bli_sscopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); -} -BLIS_INLINE void bli_dcopys_mxn - ( - const dim_t m, - const dim_t n, - const double* restrict x, const inc_t rs_x, const inc_t cs_x, - double* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - bli_ddcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); -} -BLIS_INLINE void bli_ccopys_mxn - ( - const dim_t m, - const dim_t n, - const scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - bli_cccopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); -} -BLIS_INLINE void bli_zcopys_mxn - ( - const dim_t m, - const dim_t n, - const dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - bli_zzcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); -} -#endif - -#endif diff --git a/frame/include/level0/bli_inverts.h b/frame/include/level0/bli_declinits.h similarity index 64% rename from frame/include/level0/bli_inverts.h rename to frame/include/level0/bli_declinits.h index 092e5ab4ef..0461cd1cd5 100644 --- a/frame/include/level0/bli_inverts.h +++ b/frame/include/level0/bli_declinits.h @@ -32,28 +32,26 @@ */ -#ifndef BLIS_INVERTS_H -#define BLIS_INVERTS_H +#ifndef BLIS_DECLINITS_H +#define BLIS_DECLINITS_H -// inverts +// declinits // Notes: -// - The first char encodes the type of x. - -#define bli_sinverts( x ) bli_sinvertris( bli_sreal(x), bli_simag(x) ) -#define bli_dinverts( x ) bli_dinvertris( bli_dreal(x), bli_dimag(x) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_cinverts( x ) bli_cinvertris( bli_creal(x), bli_cimag(x) ) -#define bli_zinverts( x ) bli_zinvertris( bli_zreal(x), bli_zimag(x) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_cinverts( x ) { (x) = 1.0F / (x); } -#define bli_zinverts( x ) { (x) = 1.0 / (x); } - -#endif // BLIS_ENABLE_C99_COMPLEX +// - The first char encodes the domain of output yr + yi. +// - The pxy precision character encodes the precision of x AND y (they +// are assumed to be the same). +// - These macros are used to declare AND initialize variables corresponding +// to the real and imaginary parts of (presumably) temporary variables. +// If the domain is real, only the real part is declared and initialized. + +#define bli_rdeclinits( pxy, xr, xi, yr, yi ) PASTEMAC(pxy,ctype) yr = xr; (void)yr; +#define bli_cdeclinits( pxy, xr, xi, yr, yi ) PASTEMAC(pxy,ctype) yr = xr; (void)yr; \ + PASTEMAC(pxy,ctype) yi = xi; (void)yi; + +// An extra definition for situations where we only need a real value declared +// and initialized (e.g. when explicitly implementing in the complex domain). +#define bli_rodeclinits( pxy, xr, yr ) bli_rdeclinits( pxy, xr, /*xi*/, yr, /*yi*/ ) #endif diff --git a/frame/include/level0/bli_dotjs.h b/frame/include/level0/bli_dotjs.h deleted file mode 100644 index e03f0e2a78..0000000000 --- a/frame/include/level0/bli_dotjs.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_DOTJS_H -#define BLIS_DOTJS_H - -// dotjs - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. -// - The third char encodes the type of rho. -// - x is used in conjugated form. - - -#define bli_sssdotjs( x, y, a ) bli_sssaxpyjs( y, x, a ) -#define bli_dssdotjs( x, y, a ) bli_sdsaxpyjs( y, x, a ) -#define bli_cssdotjs( x, y, a ) bli_scsaxpyjs( y, x, a ) -#define bli_zssdotjs( x, y, a ) bli_szsaxpyjs( y, x, a ) - -#define bli_sdsdotjs( x, y, a ) bli_dssaxpyjs( y, x, a ) -#define bli_ddsdotjs( x, y, a ) bli_ddsaxpyjs( y, x, a ) -#define bli_cdsdotjs( x, y, a ) bli_dcsaxpyjs( y, x, a ) -#define bli_zdsdotjs( x, y, a ) bli_dzsaxpyjs( y, x, a ) - -#define bli_scsdotjs( x, y, a ) bli_cssaxpyjs( y, x, a ) -#define bli_dcsdotjs( x, y, a ) bli_cdsaxpyjs( y, x, a ) -#define bli_ccsdotjs( x, y, a ) bli_ccsaxpyjs( y, x, a ) -#define bli_zcsdotjs( x, y, a ) bli_czsaxpyjs( y, x, a ) - -#define bli_szsdotjs( x, y, a ) bli_zssaxpyjs( y, x, a ) -#define bli_dzsdotjs( x, y, a ) bli_zdsaxpyjs( y, x, a ) -#define bli_czsdotjs( x, y, a ) bli_zcsaxpyjs( y, x, a ) -#define bli_zzsdotjs( x, y, a ) bli_zzsaxpyjs( y, x, a ) - - -#define bli_ssddotjs( x, y, a ) bli_ssdaxpyjs( y, x, a ) -#define bli_dsddotjs( x, y, a ) bli_sddaxpyjs( y, x, a ) -#define bli_csddotjs( x, y, a ) bli_scdaxpyjs( y, x, a ) -#define bli_zsddotjs( x, y, a ) bli_szdaxpyjs( y, x, a ) - -#define bli_sdddotjs( x, y, a ) bli_dsdaxpyjs( y, x, a ) -#define bli_ddddotjs( x, y, a ) bli_dddaxpyjs( y, x, a ) -#define bli_cdddotjs( x, y, a ) bli_dcdaxpyjs( y, x, a ) -#define bli_zdddotjs( x, y, a ) bli_dzdaxpyjs( y, x, a ) - -#define bli_scddotjs( x, y, a ) bli_csdaxpyjs( y, x, a ) -#define bli_dcddotjs( x, y, a ) bli_cddaxpyjs( y, x, a ) -#define bli_ccddotjs( x, y, a ) bli_ccdaxpyjs( y, x, a ) -#define bli_zcddotjs( x, y, a ) bli_czdaxpyjs( y, x, a ) - -#define bli_szddotjs( x, y, a ) bli_zsdaxpyjs( y, x, a ) -#define bli_dzddotjs( x, y, a ) bli_zddaxpyjs( y, x, a ) -#define bli_czddotjs( x, y, a ) bli_zcdaxpyjs( y, x, a ) -#define bli_zzddotjs( x, y, a ) bli_zzdaxpyjs( y, x, a ) - - -#define bli_sscdotjs( x, y, a ) bli_sscaxpyjs( y, x, a ) -#define bli_dscdotjs( x, y, a ) bli_sdcaxpyjs( y, x, a ) -#define bli_cscdotjs( x, y, a ) bli_sccaxpyjs( y, x, a ) -#define bli_zscdotjs( x, y, a ) bli_szcaxpyjs( y, x, a ) - -#define bli_sdcdotjs( x, y, a ) bli_dscaxpyjs( y, x, a ) -#define bli_ddcdotjs( x, y, a ) bli_ddcaxpyjs( y, x, a ) -#define bli_cdcdotjs( x, y, a ) bli_dccaxpyjs( y, x, a ) -#define bli_zdcdotjs( x, y, a ) bli_dzcaxpyjs( y, x, a ) - -#define bli_sccdotjs( x, y, a ) bli_cscaxpyjs( y, x, a ) -#define bli_dccdotjs( x, y, a ) bli_cdcaxpyjs( y, x, a ) -#define bli_cccdotjs( x, y, a ) bli_cccaxpyjs( y, x, a ) -#define bli_zccdotjs( x, y, a ) bli_czcaxpyjs( y, x, a ) - -#define bli_szcdotjs( x, y, a ) bli_zscaxpyjs( y, x, a ) -#define bli_dzcdotjs( x, y, a ) bli_zdcaxpyjs( y, x, a ) -#define bli_czcdotjs( x, y, a ) bli_zccaxpyjs( y, x, a ) -#define bli_zzcdotjs( x, y, a ) bli_zzcaxpyjs( y, x, a ) - - -#define bli_sszdotjs( x, y, a ) bli_sszaxpyjs( y, x, a ) -#define bli_dszdotjs( x, y, a ) bli_sdzaxpyjs( y, x, a ) -#define bli_cszdotjs( x, y, a ) bli_sczaxpyjs( y, x, a ) -#define bli_zszdotjs( x, y, a ) bli_szzaxpyjs( y, x, a ) - -#define bli_sdzdotjs( x, y, a ) bli_dszaxpyjs( y, x, a ) -#define bli_ddzdotjs( x, y, a ) bli_ddzaxpyjs( y, x, a ) -#define bli_cdzdotjs( x, y, a ) bli_dczaxpyjs( y, x, a ) -#define bli_zdzdotjs( x, y, a ) bli_dzzaxpyjs( y, x, a ) - -#define bli_sczdotjs( x, y, a ) bli_cszaxpyjs( y, x, a ) -#define bli_dczdotjs( x, y, a ) bli_cdzaxpyjs( y, x, a ) -#define bli_cczdotjs( x, y, a ) bli_cczaxpyjs( y, x, a ) -#define bli_zczdotjs( x, y, a ) bli_czzaxpyjs( y, x, a ) - -#define bli_szzdotjs( x, y, a ) bli_zszaxpyjs( y, x, a ) -#define bli_dzzdotjs( x, y, a ) bli_zdzaxpyjs( y, x, a ) -#define bli_czzdotjs( x, y, a ) bli_zczaxpyjs( y, x, a ) -#define bli_zzzdotjs( x, y, a ) bli_zzzaxpyjs( y, x, a ) - - - - - -#define bli_sdotjs( x, y, a ) bli_sssdotjs( x, y, a ) -#define bli_ddotjs( x, y, a ) bli_ddddotjs( x, y, a ) -#define bli_cdotjs( x, y, a ) bli_cccdotjs( x, y, a ) -#define bli_zdotjs( x, y, a ) bli_zzzdotjs( x, y, a ) - - -#endif - diff --git a/frame/include/level0/bli_dots.h b/frame/include/level0/bli_dots.h deleted file mode 100644 index f565ba529c..0000000000 --- a/frame/include/level0/bli_dots.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_DOTS_H -#define BLIS_DOTS_H - -// dots - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. -// - The third char encodes the type of rho. - - -#define bli_sssdots( x, y, a ) bli_sssaxpys( x, y, a ) -#define bli_dssdots( x, y, a ) bli_dssaxpys( x, y, a ) -#define bli_cssdots( x, y, a ) bli_cssaxpys( x, y, a ) -#define bli_zssdots( x, y, a ) bli_zssaxpys( x, y, a ) - -#define bli_sdsdots( x, y, a ) bli_sdsaxpys( x, y, a ) -#define bli_ddsdots( x, y, a ) bli_ddsaxpys( x, y, a ) -#define bli_cdsdots( x, y, a ) bli_cdsaxpys( x, y, a ) -#define bli_zdsdots( x, y, a ) bli_zdsaxpys( x, y, a ) - -#define bli_scsdots( x, y, a ) bli_scsaxpys( x, y, a ) -#define bli_dcsdots( x, y, a ) bli_dcsaxpys( x, y, a ) -#define bli_ccsdots( x, y, a ) bli_ccsaxpys( x, y, a ) -#define bli_zcsdots( x, y, a ) bli_zcsaxpys( x, y, a ) - -#define bli_szsdots( x, y, a ) bli_szsaxpys( x, y, a ) -#define bli_dzsdots( x, y, a ) bli_dzsaxpys( x, y, a ) -#define bli_czsdots( x, y, a ) bli_czsaxpys( x, y, a ) -#define bli_zzsdots( x, y, a ) bli_zzsaxpys( x, y, a ) - - - -#define bli_ssddots( x, y, a ) bli_ssdaxpys( x, y, a ) -#define bli_dsddots( x, y, a ) bli_dsdaxpys( x, y, a ) -#define bli_csddots( x, y, a ) bli_csdaxpys( x, y, a ) -#define bli_zsddots( x, y, a ) bli_zsdaxpys( x, y, a ) - -#define bli_sdddots( x, y, a ) bli_sddaxpys( x, y, a ) -#define bli_ddddots( x, y, a ) bli_dddaxpys( x, y, a ) -#define bli_cdddots( x, y, a ) bli_cddaxpys( x, y, a ) -#define bli_zdddots( x, y, a ) bli_zddaxpys( x, y, a ) - -#define bli_scddots( x, y, a ) bli_scdaxpys( x, y, a ) -#define bli_dcddots( x, y, a ) bli_dcdaxpys( x, y, a ) -#define bli_ccddots( x, y, a ) bli_ccdaxpys( x, y, a ) -#define bli_zcddots( x, y, a ) bli_zcdaxpys( x, y, a ) - -#define bli_szddots( x, y, a ) bli_szdaxpys( x, y, a ) -#define bli_dzddots( x, y, a ) bli_dzdaxpys( x, y, a ) -#define bli_czddots( x, y, a ) bli_czdaxpys( x, y, a ) -#define bli_zzddots( x, y, a ) bli_zzdaxpys( x, y, a ) - - - -#define bli_sscdots( x, y, a ) bli_sscaxpys( x, y, a ) -#define bli_dscdots( x, y, a ) bli_dscaxpys( x, y, a ) -#define bli_cscdots( x, y, a ) bli_cscaxpys( x, y, a ) -#define bli_zscdots( x, y, a ) bli_zscaxpys( x, y, a ) - -#define bli_sdcdots( x, y, a ) bli_sdcaxpys( x, y, a ) -#define bli_ddcdots( x, y, a ) bli_ddcaxpys( x, y, a ) -#define bli_cdcdots( x, y, a ) bli_cdcaxpys( x, y, a ) -#define bli_zdcdots( x, y, a ) bli_zdcaxpys( x, y, a ) - -#define bli_sccdots( x, y, a ) bli_sccaxpys( x, y, a ) -#define bli_dccdots( x, y, a ) bli_dccaxpys( x, y, a ) -#define bli_cccdots( x, y, a ) bli_cccaxpys( x, y, a ) -#define bli_zccdots( x, y, a ) bli_zccaxpys( x, y, a ) - -#define bli_szcdots( x, y, a ) bli_szcaxpys( x, y, a ) -#define bli_dzcdots( x, y, a ) bli_dzcaxpys( x, y, a ) -#define bli_czcdots( x, y, a ) bli_czcaxpys( x, y, a ) -#define bli_zzcdots( x, y, a ) bli_zzcaxpys( x, y, a ) - - - -#define bli_sszdots( x, y, a ) bli_sszaxpys( x, y, a ) -#define bli_dszdots( x, y, a ) bli_dszaxpys( x, y, a ) -#define bli_cszdots( x, y, a ) bli_cszaxpys( x, y, a ) -#define bli_zszdots( x, y, a ) bli_zszaxpys( x, y, a ) - -#define bli_sdzdots( x, y, a ) bli_sdzaxpys( x, y, a ) -#define bli_ddzdots( x, y, a ) bli_ddzaxpys( x, y, a ) -#define bli_cdzdots( x, y, a ) bli_cdzaxpys( x, y, a ) -#define bli_zdzdots( x, y, a ) bli_zdzaxpys( x, y, a ) - -#define bli_sczdots( x, y, a ) bli_sczaxpys( x, y, a ) -#define bli_dczdots( x, y, a ) bli_dczaxpys( x, y, a ) -#define bli_cczdots( x, y, a ) bli_cczaxpys( x, y, a ) -#define bli_zczdots( x, y, a ) bli_zczaxpys( x, y, a ) - -#define bli_szzdots( x, y, a ) bli_szzaxpys( x, y, a ) -#define bli_dzzdots( x, y, a ) bli_dzzaxpys( x, y, a ) -#define bli_czzdots( x, y, a ) bli_czzaxpys( x, y, a ) -#define bli_zzzdots( x, y, a ) bli_zzzaxpys( x, y, a ) - - - -#define bli_sdots( x, y, a ) bli_sssdots( x, y, a ) -#define bli_ddots( x, y, a ) bli_ddddots( x, y, a ) -#define bli_cdots( x, y, a ) bli_cccdots( x, y, a ) -#define bli_zdots( x, y, a ) bli_zzzdots( x, y, a ) - - -#endif - diff --git a/frame/include/level0/bli_eq.h b/frame/include/level0/bli_eq.h deleted file mode 100644 index 691542b089..0000000000 --- a/frame/include/level0/bli_eq.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_EQ_H -#define BLIS_EQ_H - - -// eq (passed by value) - -#define bli_seq( a, b ) ( (a) == (b) ) -#define bli_deq( a, b ) ( (a) == (b) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_ceq( a, b ) ( ( bli_creal(a) == bli_creal(b) ) && ( bli_cimag(a) == bli_cimag(b) ) ) -#define bli_zeq( a, b ) ( ( bli_zreal(a) == bli_zreal(b) ) && ( bli_zimag(a) == bli_zimag(b) ) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_ceq( a, b ) ( (a) == (b) ) -#define bli_zeq( a, b ) ( (a) == (b) ) - -#endif // BLIS_ENABLE_C99_COMPLEX - -#define bli_ieq( a, b ) ( (a) == (b) ) - - - -// eqtori (passed by value) - -#define bli_seqtori( a, br, bi ) ( (a) == (br) ) -#define bli_deqtori( a, br, bi ) ( (a) == (br) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_ceqtori( a, br, bi ) ( ( bli_creal(a) == (br) ) && ( bli_cimag(a) == (bi) ) ) -#define bli_zeqtori( a, br, bi ) ( ( bli_zreal(a) == (br) ) && ( bli_zimag(a) == (bi) ) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_ceqtori( a, br, bi ) ( (a) == (br) + (bi) * (I) ) -#define bli_zeqtori( a, br, bi ) ( (a) == (br) + (bi) * (I) ) - -#endif // BLIS_ENABLE_C99_COMPLEX - - - -// eqa (passed by address) - -#define bli_seqa( a, b ) bli_seq( *(( float* )(a)), *(( float* )(b)) ) -#define bli_deqa( a, b ) bli_deq( *(( double* )(a)), *(( double* )(b)) ) -#define bli_ceqa( a, b ) bli_ceq( *(( scomplex* )(a)), *(( scomplex* )(b)) ) -#define bli_zeqa( a, b ) bli_zeq( *(( dcomplex* )(a)), *(( dcomplex* )(b)) ) -#define bli_ieqa( a, b ) bli_ieq( *(( gint_t* )(a)), *(( gint_t* )(b)) ) - - - -// eq1 - -#define bli_seq1( a ) bli_seqtori( (a), 1.0F, 0.0F ) -#define bli_deq1( a ) bli_deqtori( (a), 1.0, 0.0 ) -#define bli_ceq1( a ) bli_ceqtori( (a), 1.0F, 0.0F ) -#define bli_zeq1( a ) bli_zeqtori( (a), 1.0, 0.0 ) -#define bli_ieq1( a ) bli_ieq ( (a), 1 ) - - - -// eq0 - -#define bli_seq0( a ) bli_seqtori( (a), 0.0F, 0.0F ) -#define bli_deq0( a ) bli_deqtori( (a), 0.0, 0.0 ) -#define bli_ceq0( a ) bli_ceqtori( (a), 0.0F, 0.0F ) -#define bli_zeq0( a ) bli_zeqtori( (a), 0.0, 0.0 ) -#define bli_ieq0( a ) bli_ieq ( (a), 0 ) - - - -// eqm1 - -#define bli_seqm1( a ) bli_seqtori( (a), -1.0F, 0.0F ) -#define bli_deqm1( a ) bli_deqtori( (a), -1.0, 0.0 ) -#define bli_ceqm1( a ) bli_ceqtori( (a), -1.0F, 0.0F ) -#define bli_zeqm1( a ) bli_zeqtori( (a), -1.0, 0.0 ) -#define bli_ieqm1( a ) bli_ieq ( (a), -1 ) - - - -#endif diff --git a/frame/include/level0/bli_gets.h b/frame/include/level0/bli_gets.h deleted file mode 100644 index 985ecf4cb3..0000000000 --- a/frame/include/level0/bli_gets.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_GETS_H -#define BLIS_GETS_H - -// gets - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - - -#define bli_ssgets( x, yr, yi ) { (yr) = bli_sreal(x); (yi) = bli_simag(x); } -#define bli_dsgets( x, yr, yi ) { (yr) = bli_dreal(x); (yi) = bli_dimag(x); } -#define bli_csgets( x, yr, yi ) { (yr) = bli_creal(x); (yi) = bli_cimag(x); } -#define bli_zsgets( x, yr, yi ) { (yr) = bli_zreal(x); (yi) = bli_zimag(x); } -#define bli_isgets( x, yr, yi ) { (yr) = ( float )(x); (yi) = 0.0F; } - -#define bli_sdgets( x, yr, yi ) { (yr) = bli_sreal(x); (yi) = bli_simag(x); } -#define bli_ddgets( x, yr, yi ) { (yr) = bli_dreal(x); (yi) = bli_dimag(x); } -#define bli_cdgets( x, yr, yi ) { (yr) = bli_creal(x); (yi) = bli_cimag(x); } -#define bli_zdgets( x, yr, yi ) { (yr) = bli_zreal(x); (yi) = bli_zimag(x); } -#define bli_idgets( x, yr, yi ) { (yr) = ( double )(x); (yi) = 0.0; } - -#define bli_scgets( x, yr, yi ) { (yr) = bli_sreal(x); (yi) = bli_simag(x); } -#define bli_dcgets( x, yr, yi ) { (yr) = bli_dreal(x); (yi) = bli_dimag(x); } -#define bli_ccgets( x, yr, yi ) { (yr) = bli_creal(x); (yi) = bli_cimag(x); } -#define bli_zcgets( x, yr, yi ) { (yr) = bli_zreal(x); (yi) = bli_zimag(x); } -#define bli_icgets( x, yr, yi ) { (yr) = ( float )(x); (yi) = 0.0F; } - -#define bli_szgets( x, yr, yi ) { (yr) = bli_sreal(x); (yi) = bli_simag(x); } -#define bli_dzgets( x, yr, yi ) { (yr) = bli_dreal(x); (yi) = bli_dimag(x); } -#define bli_czgets( x, yr, yi ) { (yr) = bli_creal(x); (yi) = bli_cimag(x); } -#define bli_zzgets( x, yr, yi ) { (yr) = bli_zreal(x); (yi) = bli_zimag(x); } -#define bli_izgets( x, yr, yi ) { (yr) = ( double )(x); (yi) = 0.0; } - -#define bli_sigets( x, yr, yi ) { (yr) = bli_sreal(x); (yi) = 0; } -#define bli_digets( x, yr, yi ) { (yr) = bli_dreal(x); (yi) = 0; } -#define bli_cigets( x, yr, yi ) { (yr) = bli_creal(x); (yi) = 0; } -#define bli_zigets( x, yr, yi ) { (yr) = bli_zreal(x); (yi) = 0; } -#define bli_iigets( x, yr, yi ) { (yr) = (x); (yi) = 0; } - - -#define bli_sgets( x, yr, yi ) bli_ssgets( x, yr, yi ) -#define bli_dgets( x, yr, yi ) bli_ddgets( x, yr, yi ) -#define bli_cgets( x, yr, yi ) bli_csgets( x, yr, yi ) -#define bli_zgets( x, yr, yi ) bli_zdgets( x, yr, yi ) -#define bli_igets( x, yr, yi ) bli_idgets( x, yr, yi ) - - -#endif diff --git a/frame/include/level0/bli_invscaljs.h b/frame/include/level0/bli_invscaljs.h deleted file mode 100644 index 2c26bdc7f9..0000000000 --- a/frame/include/level0/bli_invscaljs.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_INVSCALJS_H -#define BLIS_INVSCALJS_H - -// invscaljs - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of y. - -#define bli_ssinvscaljs( a, y ) bli_sinvscaljris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) -#define bli_dsinvscaljs( a, y ) bli_sinvscaljris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_csinvscaljs( a, y ) bli_sinvscaljris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_zsinvscaljs( a, y ) bli_sinvscaljris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) - -#define bli_sdinvscaljs( a, y ) bli_dinvscaljris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_ddinvscaljs( a, y ) bli_dinvscaljris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_cdinvscaljs( a, y ) bli_dinvscaljris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_zdinvscaljs( a, y ) bli_dinvscaljris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_scinvscaljs( a, y ) bli_scinvscaljris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) -#define bli_dcinvscaljs( a, y ) bli_scinvscaljris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_ccinvscaljs( a, y ) bli_cinvscaljris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_zcinvscaljs( a, y ) bli_cinvscaljris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) - -#define bli_szinvscaljs( a, y ) bli_dzinvscaljris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_dzinvscaljs( a, y ) bli_dzinvscaljris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_czinvscaljs( a, y ) bli_zinvscaljris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_zzinvscaljs( a, y ) bli_zinvscaljris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_scinvscaljs( a, y ) { (y) /= (a); } -#define bli_dcinvscaljs( a, y ) { (y) /= (a); } -#define bli_ccinvscaljs( a, y ) { (y) /= conjf(a); } -#define bli_zcinvscaljs( a, y ) { (y) /= conj (a); } - -#define bli_szinvscaljs( a, y ) { (y) /= (a); } -#define bli_dzinvscaljs( a, y ) { (y) /= (a); } -#define bli_czinvscaljs( a, y ) { (y) /= conjf(a); } -#define bli_zzinvscaljs( a, y ) { (y) /= conj (a); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_sinvscaljs( a, y ) bli_ssinvscaljs( a, y ) -#define bli_dinvscaljs( a, y ) bli_ddinvscaljs( a, y ) -#define bli_cinvscaljs( a, y ) bli_ccinvscaljs( a, y ) -#define bli_zinvscaljs( a, y ) bli_zzinvscaljs( a, y ) - - -#endif - diff --git a/frame/include/level0/bli_invscals.h b/frame/include/level0/bli_invscals.h deleted file mode 100644 index 558298f0d0..0000000000 --- a/frame/include/level0/bli_invscals.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_INVSCALS_H -#define BLIS_INVSCALS_H - -// invscals - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of y. - -#define bli_ssinvscals( a, y ) bli_sinvscalris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) -#define bli_dsinvscals( a, y ) bli_sinvscalris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_csinvscals( a, y ) bli_sinvscalris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_zsinvscals( a, y ) bli_sinvscalris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) - -#define bli_sdinvscals( a, y ) bli_dinvscalris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_ddinvscals( a, y ) bli_dinvscalris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_cdinvscals( a, y ) bli_dinvscalris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_zdinvscals( a, y ) bli_dinvscalris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_scinvscals( a, y ) bli_scinvscalris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) -#define bli_dcinvscals( a, y ) bli_scinvscalris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_ccinvscals( a, y ) bli_cinvscalris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_zcinvscals( a, y ) bli_cinvscalris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) - -#define bli_szinvscals( a, y ) bli_dzinvscalris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_dzinvscals( a, y ) bli_dzinvscalris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_czinvscals( a, y ) bli_zinvscalris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_zzinvscals( a, y ) bli_zinvscalris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_scinvscals( a, y ) { (y) /= (a); } -#define bli_dcinvscals( a, y ) { (y) /= (a); } -#define bli_ccinvscals( a, y ) { (y) /= (a); } -#define bli_zcinvscals( a, y ) { (y) /= (a); } - -#define bli_szinvscals( a, y ) { (y) /= (a); } -#define bli_dzinvscals( a, y ) { (y) /= (a); } -#define bli_czinvscals( a, y ) { (y) /= (a); } -#define bli_zzinvscals( a, y ) { (y) /= (a); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_sinvscals( a, y ) bli_ssinvscals( a, y ) -#define bli_dinvscals( a, y ) bli_ddinvscals( a, y ) -#define bli_cinvscals( a, y ) bli_ccinvscals( a, y ) -#define bli_zinvscals( a, y ) bli_zzinvscals( a, y ) - - -#endif - diff --git a/frame/include/level0/bli_lte.h b/frame/include/level0/bli_lte.h deleted file mode 100644 index ab87ff8006..0000000000 --- a/frame/include/level0/bli_lte.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_LTE_H -#define BLIS_LTE_H - - -// lte (passed by value) - -#define bli_slte( a, b ) ( (a) <= (b) ) -#define bli_dlte( a, b ) ( (a) <= (b) ) -#define bli_clte( a, b ) ( bli_creal(a) <= bli_creal(b) ) -#define bli_zlte( a, b ) ( bli_zreal(a) <= bli_zreal(b) ) -#define bli_ilte( a, b ) ( (a) <= (b) ) - -// lte0 - -#define bli_slte0( a ) ( (a) <= 0.0F ) -#define bli_dlte0( a ) ( (a) <= 0.0 ) -#define bli_clte0( a ) ( bli_creal(a) <= 0.0F ) -#define bli_zlte0( a ) ( bli_zreal(a) <= 0.0 ) - -// gte (passed by value) - -#define bli_sgte( a, b ) ( (a) >= (b) ) -#define bli_dgte( a, b ) ( (a) >= (b) ) -#define bli_cgte( a, b ) ( bli_creal(a) >= bli_creal(b) ) -#define bli_zgte( a, b ) ( bli_zreal(a) >= bli_zreal(b) ) -#define bli_igte( a, b ) ( (a) >= (b) ) - -// gte0 - -#define bli_sgte0( a ) ( (a) >= 0.0F ) -#define bli_dgte0( a ) ( (a) >= 0.0 ) -#define bli_cgte0( a ) ( bli_creal(a) >= 0.0F ) -#define bli_zgte0( a ) ( bli_zreal(a) >= 0.0 ) - - - -#endif diff --git a/frame/include/level0/bli_neg2s.h b/frame/include/level0/bli_neg2s.h deleted file mode 100644 index 2f505d50db..0000000000 --- a/frame/include/level0/bli_neg2s.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_NEG2S_H -#define BLIS_NEG2S_H - -// neg2s - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#define bli_ssneg2s( x, y ) bli_sneg2ris( bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dsneg2s( x, y ) bli_sneg2ris( bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_csneg2s( x, y ) bli_sneg2ris( bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zsneg2s( x, y ) bli_sneg2ris( bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_sdneg2s( x, y ) bli_dneg2ris( bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_ddneg2s( x, y ) bli_dneg2ris( bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_cdneg2s( x, y ) bli_dneg2ris( bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zdneg2s( x, y ) bli_dneg2ris( bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_scneg2s( x, y ) bli_cneg2ris( bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dcneg2s( x, y ) bli_cneg2ris( bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_ccneg2s( x, y ) bli_cneg2ris( bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zcneg2s( x, y ) bli_cneg2ris( bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_szneg2s( x, y ) bli_zneg2ris( bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dzneg2s( x, y ) bli_zneg2ris( bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_czneg2s( x, y ) bli_zneg2ris( bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zzneg2s( x, y ) bli_zneg2ris( bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_scneg2s( x, y ) { (y) = -(x); } -#define bli_dcneg2s( x, y ) { (y) = -(x); } -#define bli_ccneg2s( x, y ) { (y) = -(x); } -#define bli_zcneg2s( x, y ) { (y) = -(x); } - -#define bli_szneg2s( x, y ) { (y) = -(x); } -#define bli_dzneg2s( x, y ) { (y) = -(x); } -#define bli_czneg2s( x, y ) { (y) = -(x); } -#define bli_zzneg2s( x, y ) { (y) = -(x); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_sneg2s( x, y ) bli_ssneg2s( x, y ) -#define bli_dneg2s( x, y ) bli_ddneg2s( x, y ) -#define bli_cneg2s( x, y ) bli_ccneg2s( x, y ) -#define bli_zneg2s( x, y ) bli_zzneg2s( x, y ) - - -#endif - diff --git a/frame/include/level0/bli_randnp2s.h b/frame/include/level0/bli_randnp2s.h deleted file mode 100644 index 7904f72aa5..0000000000 --- a/frame/include/level0/bli_randnp2s.h +++ /dev/null @@ -1,175 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_RANDNP2S_H -#define BLIS_RANDNP2S_H - -// randnp2s - - -#define bli_srandnp2s( a ) \ -{ \ - bli_drandnp2s( a ); \ -} - -#if 0 -#define bli_drandnp2s_prev( a ) \ -{ \ - const double m_max = 3.0; \ - const double m_max2 = m_max + 2.0; \ - double t; \ - double r_val; \ -\ - /* Compute a narrow-range power of two. - - For the purposes of commentary, we'll assume that m_max = 4. This - represents the largest power of two we will use to generate the - random numbers. */ \ -\ - /* Generate a random real number t on the interval: [0.0, 6.0]. */ \ - t = ( ( double ) rand() / ( double ) RAND_MAX ) * m_max2; \ -\ - /* Modify t to guarantee that is never equal to the upper bound of - the interval (in this case, 6.0). */ \ - if ( t == m_max2 ) t = t - 1.0; \ -\ - /* Transform the interval into the set of integers, {0,1,2,3,4,5}. */ \ - t = floor( t ); \ -\ - /* Map values of t == 0 to a final value of 0. */ \ - if ( t == 0.0 ) r_val = 0.0; \ - else \ - { \ - /* This case handles values of t = {1,2,3,4,5}. */ \ -\ - double s_exp, s_val; \ -\ - /* Compute two random numbers to determine the signs of the - exponent and the end result. */ \ - PASTEMAC(d,rands)( s_exp ); \ - PASTEMAC(d,rands)( s_val ); \ -\ - /* Compute r_val = 2^s where s = +/-(t-1) = {-4,-3,-2,-1,0,1,2,3,4}. */ \ - if ( s_exp < 0.0 ) r_val = pow( 2.0, -(t - 1.0) ); \ - else r_val = pow( 2.0, t - 1.0 ); \ -\ - /* If our sign value is negative, our random power of two will - be negative. */ \ - if ( s_val < 0.0 ) r_val = -r_val; \ - } \ -\ - /* Normalize by the largest possible positive value. */ \ - r_val = r_val / pow( 2.0, m_max ); \ -\ - /* r_val = 0, or +/-{2^-4, 2^-3, 2^-2, 2^-1, 2^0, 2^1, 2^2, 2^3, 2^4}. */ \ - /* NOTE: For single-precision macros, this assignment results in typecast - down to float. */ \ - a = r_val; \ -} -#endif - -#define bli_drandnp2s( a ) \ -{ \ - const double m_max = 6.0; \ - const double m_max2 = m_max + 2.0; \ - double t; \ - double r_val; \ -\ - /* Compute a narrow-range power of two. - - For the purposes of commentary, we'll assume that m_max = 4. This - represents the largest power of two we will use to generate the - random numbers. */ \ -\ - do \ - { \ - /* Generate a random real number t on the interval: [0.0, 6.0]. */ \ - t = ( ( double ) rand() / ( double ) RAND_MAX ) * m_max2; \ -\ - /* Transform the interval into the set of integers, {0,1,2,3,4,5}. - Note that 6 is prohibited by the loop guard below. */ \ - t = floor( t ); \ - } \ - /* If t is ever equal to m_max2, we re-randomize. The guard against - m_max2 < t is for sanity and shouldn't happen, unless perhaps there - is weirdness in the typecasting to double when computing t above. */ \ - while ( m_max2 <= t ); \ -\ - /* Map values of t == 0 to a final value of 0. */ \ - if ( t == 0.0 ) r_val = 0.0; \ - else \ - { \ - /* This case handles values of t = {1,2,3,4,5}. */ \ -\ - double s_val; \ -\ - /* Compute r_val = 2^s where s = -(t-1) = {-4,-3,-2,-1,0}. */ \ - r_val = pow( 2.0, -(t - 1.0) ); \ -\ - /* Compute a random number to determine the sign of the final - result. */ \ - PASTEMAC(d,rands)( s_val ); \ -\ - /* If our sign value is negative, our random power of two will - be negative. */ \ - if ( s_val < 0.0 ) r_val = -r_val; \ - } \ -\ - /* r_val = 0, or +/-{2^0, 2^-1, 2^-2, 2^-3, 2^-4}. */ \ - /* NOTE: For single-precision macros, this assignment results in typecast - down to float. */ \ - a = r_val; \ -} -#define bli_crandnp2s( a ) \ -{ \ - float ar, ai; \ -\ - bli_srandnp2s( ar ); \ - bli_srandnp2s( ai ); \ -\ - bli_csets( ar, ai, (a) ); \ -} -#define bli_zrandnp2s( a ) \ -{ \ - double ar, ai; \ -\ - bli_drandnp2s( ar ); \ - bli_drandnp2s( ai ); \ -\ - bli_zsets( ar, ai, (a) ); \ -} - - -#endif - diff --git a/frame/include/level0/bli_rands.h b/frame/include/level0/bli_rands.h deleted file mode 100644 index b377a61532..0000000000 --- a/frame/include/level0/bli_rands.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_RANDS_H -#define BLIS_RANDS_H - -// rands - - -#define bli_srands( a ) \ -{ \ - (a) = ( float ) ( ( double ) rand() / \ - ( ( double ) RAND_MAX / 2.0 ) \ - ) - 1.0F; \ -} -#define bli_drands( a ) \ -{ \ - (a) = ( double ) ( ( double ) rand() / \ - ( ( double ) RAND_MAX / 2.0 ) \ - ) - 1.0; \ -} -#define bli_crands( a ) \ -{ \ - float ar, ai; \ -\ - bli_srands( ar ); \ - bli_srands( ai ); \ -\ - bli_csets( ar, ai, (a) ); \ -} -#define bli_zrands( a ) \ -{ \ - double ar, ai; \ -\ - bli_drands( ar ); \ - bli_drands( ai ); \ -\ - bli_zsets( ar, ai, (a) ); \ -} - - -#endif - diff --git a/frame/include/level0/bli_scal2js.h b/frame/include/level0/bli_scal2js.h deleted file mode 100644 index e8f3acc27d..0000000000 --- a/frame/include/level0/bli_scal2js.h +++ /dev/null @@ -1,192 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2JS_H -#define BLIS_SCAL2JS_H - -// scal2js - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of y. - - -// -- (axy) = (??s) ------------------------------------------------------------ - -#define bli_sssscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dssscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cssscal2js( a, x, y ) bli_rxscal2jris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zssscal2js( a, x, y ) bli_rxscal2jris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_sdsscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_ddsscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cdsscal2js( a, x, y ) bli_rxscal2jris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zdsscal2js( a, x, y ) bli_rxscal2jris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_scsscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dcsscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_ccsscal2js( a, x, y ) bli_roscal2jris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zcsscal2js( a, x, y ) bli_roscal2jris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_szsscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dzsscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_czsscal2js( a, x, y ) bli_roscal2jris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zzsscal2js( a, x, y ) bli_roscal2jris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) - -// -- (axy) = (??d) ------------------------------------------------------------ - -#define bli_ssdscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dsdscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_csdscal2js( a, x, y ) bli_rxscal2jris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zsdscal2js( a, x, y ) bli_rxscal2jris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_sddscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dddscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_cddscal2js( a, x, y ) bli_rxscal2jris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zddscal2js( a, x, y ) bli_rxscal2jris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_scdscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dcdscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_ccdscal2js( a, x, y ) bli_roscal2jris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zcdscal2js( a, x, y ) bli_roscal2jris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_szdscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dzdscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_czdscal2js( a, x, y ) bli_roscal2jris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zzdscal2js( a, x, y ) bli_roscal2jris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dscscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cscscal2js( a, x, y ) bli_rcscal2jris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zscscal2js( a, x, y ) bli_rcscal2jris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_sdcscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_ddcscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cdcscal2js( a, x, y ) bli_rcscal2jris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zdcscal2js( a, x, y ) bli_rcscal2jris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_sccscal2js( a, x, y ) bli_crscal2jris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dccscal2js( a, x, y ) bli_crscal2jris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cccscal2js( a, x, y ) bli_cxscal2jris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zccscal2js( a, x, y ) bli_cxscal2jris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_szcscal2js( a, x, y ) bli_crscal2jris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dzcscal2js( a, x, y ) bli_crscal2jris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_czcscal2js( a, x, y ) bli_cxscal2jris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zzcscal2js( a, x, y ) bli_cxscal2jris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dszscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cszscal2js( a, x, y ) bli_rcscal2jris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zszscal2js( a, x, y ) bli_rcscal2jris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_sdzscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_ddzscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cdzscal2js( a, x, y ) bli_rcscal2jris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zdzscal2js( a, x, y ) bli_rcscal2jris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_sczscal2js( a, x, y ) bli_crscal2jris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dczscal2js( a, x, y ) bli_crscal2jris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cczscal2js( a, x, y ) bli_cxscal2jris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zczscal2js( a, x, y ) bli_cxscal2jris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_szzscal2js( a, x, y ) bli_crscal2jris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dzzscal2js( a, x, y ) bli_crscal2jris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_czzscal2js( a, x, y ) bli_cxscal2jris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zzzscal2js( a, x, y ) bli_cxscal2jris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscscal2js( a, x, y ) { (y) = (a) * (x); } -#define bli_dscscal2js( a, x, y ) { (y) = (a) * (x); } -#define bli_cscscal2js( a, x, y ) { (y) = (a) * (x); } -#define bli_zscscal2js( a, x, y ) { (y) = (a) * (x); } - -#define bli_sdcscal2js( a, x, y ) { (y) = (a) * (x); } -#define bli_ddcscal2js( a, x, y ) { (y) = (a) * (x); } -#define bli_cdcscal2js( a, x, y ) { (y) = (a) * (x); } -#define bli_zdcscal2js( a, x, y ) { (y) = (a) * (x); } - -#define bli_sccscal2js( a, x, y ) { (y) = (a) * conjf(x); } -#define bli_dccscal2js( a, x, y ) { (y) = (a) * conjf(x); } -#define bli_cccscal2js( a, x, y ) { (y) = (a) * conjf(x); } -#define bli_zccscal2js( a, x, y ) { (y) = (a) * conjf(x); } - -#define bli_szcscal2js( a, x, y ) { (y) = (a) * conj(x); } -#define bli_dzcscal2js( a, x, y ) { (y) = (a) * conj(x); } -#define bli_czcscal2js( a, x, y ) { (y) = (a) * conj(x); } -#define bli_zzcscal2js( a, x, y ) { (y) = (a) * conj(x); } - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszscal2js( a, x, y ) { (y) = (a) * (x); } -#define bli_dszscal2js( a, x, y ) { (y) = (a) * (x); } -#define bli_cszscal2js( a, x, y ) { (y) = (a) * (x); } -#define bli_zszscal2js( a, x, y ) { (y) = (a) * (x); } - -#define bli_sdzscal2js( a, x, y ) { (y) = (a) * (x); } -#define bli_ddzscal2js( a, x, y ) { (y) = (a) * (x); } -#define bli_cdzscal2js( a, x, y ) { (y) = (a) * (x); } -#define bli_zdzscal2js( a, x, y ) { (y) = (a) * (x); } - -#define bli_sczscal2js( a, x, y ) { (y) = (a) * conjf(x); } -#define bli_dczscal2js( a, x, y ) { (y) = (a) * conjf(x); } -#define bli_cczscal2js( a, x, y ) { (y) = (a) * conjf(x); } -#define bli_zczscal2js( a, x, y ) { (y) = (a) * conjf(x); } - -#define bli_szzscal2js( a, x, y ) { (y) = (a) * conj(x); } -#define bli_dzzscal2js( a, x, y ) { (y) = (a) * conj(x); } -#define bli_czzscal2js( a, x, y ) { (y) = (a) * conj(x); } -#define bli_zzzscal2js( a, x, y ) { (y) = (a) * conj(x); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_sscal2js( a, x, y ) bli_sssscal2js( a, x, y ) -#define bli_dscal2js( a, x, y ) bli_dddscal2js( a, x, y ) -#define bli_cscal2js( a, x, y ) bli_cccscal2js( a, x, y ) -#define bli_zscal2js( a, x, y ) bli_zzzscal2js( a, x, y ) - - -#endif - diff --git a/frame/include/level0/bli_scal2s.h b/frame/include/level0/bli_scal2s.h deleted file mode 100644 index d963595c02..0000000000 --- a/frame/include/level0/bli_scal2s.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2S_H -#define BLIS_SCAL2S_H - -// scal2s - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of y. - -// -- (axy) = (??s) ------------------------------------------------------------ - -#define bli_sssscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dssscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cssscal2s( a, x, y ) bli_rxscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zssscal2s( a, x, y ) bli_rxscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_sdsscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_ddsscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_cdsscal2s( a, x, y ) bli_rxscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zdsscal2s( a, x, y ) bli_rxscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_scsscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dcsscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_ccsscal2s( a, x, y ) bli_roscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zcsscal2s( a, x, y ) bli_roscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_szsscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dzsscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_czsscal2s( a, x, y ) bli_roscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zzsscal2s( a, x, y ) bli_roscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) - -// -- (axy) = (??d) ------------------------------------------------------------ - -#define bli_ssdscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dsdscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_csdscal2s( a, x, y ) bli_rxscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zsdscal2s( a, x, y ) bli_rxscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_sddscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dddscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_cddscal2s( a, x, y ) bli_rxscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zddscal2s( a, x, y ) bli_rxscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_scdscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dcdscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_ccdscal2s( a, x, y ) bli_roscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zcdscal2s( a, x, y ) bli_roscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) - -#define bli_szdscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_dzdscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_czdscal2s( a, x, y ) bli_roscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zzdscal2s( a, x, y ) bli_roscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dscscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cscscal2s( a, x, y ) bli_rcscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zscscal2s( a, x, y ) bli_rcscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_sdcscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_ddcscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cdcscal2s( a, x, y ) bli_rcscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zdcscal2s( a, x, y ) bli_rcscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_sccscal2s( a, x, y ) bli_crscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dccscal2s( a, x, y ) bli_crscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_cccscal2s( a, x, y ) bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zccscal2s( a, x, y ) bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_szcscal2s( a, x, y ) bli_crscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dzcscal2s( a, x, y ) bli_crscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_czcscal2s( a, x, y ) bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zzcscal2s( a, x, y ) bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dszscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cszscal2s( a, x, y ) bli_rcscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zszscal2s( a, x, y ) bli_rcscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_sdzscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_ddzscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cdzscal2s( a, x, y ) bli_rcscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zdzscal2s( a, x, y ) bli_rcscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_sczscal2s( a, x, y ) bli_crscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dczscal2s( a, x, y ) bli_crscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_cczscal2s( a, x, y ) bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zczscal2s( a, x, y ) bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) - -#define bli_szzscal2s( a, x, y ) bli_crscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dzzscal2s( a, x, y ) bli_crscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_czzscal2s( a, x, y ) bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zzzscal2s( a, x, y ) bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_dscscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_cscscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_zscscal2s( a, x, y ) { (y) = (a) * (x); } - -#define bli_sdcscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_ddcscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_cdcscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_zdcscal2s( a, x, y ) { (y) = (a) * (x); } - -#define bli_sccscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_dccscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_cccscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_zccscal2s( a, x, y ) { (y) = (a) * (x); } - -#define bli_szcscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_dzcscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_czcscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_zzcscal2s( a, x, y ) { (y) = (a) * (x); } - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_dszscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_cszscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_zszscal2s( a, x, y ) { (y) = (a) * (x); } - -#define bli_sdzscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_ddzscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_cdzscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_zdzscal2s( a, x, y ) { (y) = (a) * (x); } - -#define bli_sczscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_dczscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_cczscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_zczscal2s( a, x, y ) { (y) = (a) * (x); } - -#define bli_szzscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_dzzscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_czzscal2s( a, x, y ) { (y) = (a) * (x); } -#define bli_zzzscal2s( a, x, y ) { (y) = (a) * (x); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_sscal2s( a, x, y ) bli_sssscal2s( a, x, y ) -#define bli_dscal2s( a, x, y ) bli_dddscal2s( a, x, y ) -#define bli_cscal2s( a, x, y ) bli_cccscal2s( a, x, y ) -#define bli_zscal2s( a, x, y ) bli_zzzscal2s( a, x, y ) - - -#endif - diff --git a/frame/include/level0/bli_scal2s_mxn.h b/frame/include/level0/bli_scal2s_mxn.h deleted file mode 100644 index fdfea4dd99..0000000000 --- a/frame/include/level0/bli_scal2s_mxn.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2S_MXN_H -#define BLIS_SCAL2S_MXN_H - -// scal2s_mxn - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of y. -// - We only implement cases where typeof(a) == type(x). - -#undef BLIS_ENABLE_CR_CASES -#define BLIS_ENABLE_CR_CASES 0 - -// -- bli_???scal2s_mxn -- - -#undef GENTFUNC2 -#define GENTFUNC2( ctypex, ctypey, chx, chy, opname, kername ) \ -\ -BLIS_INLINE void PASTEMAC(chx,chx,chy,opname) \ - ( \ - const conj_t conjx, \ - const dim_t m, \ - const dim_t n, \ - const ctypex* alpha, \ - const ctypex* x, inc_t rs_x, inc_t cs_x, \ - ctypey* y, inc_t rs_y, inc_t cs_y \ - ) \ -{ \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( dim_t j = 0; j < n; ++j ) \ - { \ - const ctypex* restrict xj = x + j*cs_x; \ - ctypey* restrict yj = y + j*cs_y; \ -\ - for ( dim_t i = 0; i < m; ++i ) \ - { \ - const ctypex* restrict xij = xj + i*rs_x; \ - ctypey* restrict yij = yj + i*rs_y; \ -\ - PASTEMAC(chx,chx,chy,scal2js)( *alpha, *xij, *yij ); \ - } \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( dim_t j = 0; j < n; ++j ) \ - { \ - const ctypex* restrict xj = x + j*cs_x; \ - ctypey* restrict yj = y + j*cs_y; \ -\ - for ( dim_t i = 0; i < m; ++i ) \ - { \ - const ctypex* restrict xij = xj + i*rs_x; \ - ctypey* restrict yij = yj + i*rs_y; \ -\ - PASTEMAC(chx,chx,chy,scal2s)( *alpha, *xij, *yij ); \ - } \ - } \ - } \ -} - -INSERT_GENTFUNC2_BASIC ( scal2s_mxn, scal2s ) -INSERT_GENTFUNC2_MIX_DP( scal2s_mxn, scal2s ) - - -// -- bli_?scal2s_mxn -- - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname ) \ -\ -BLIS_INLINE void PASTEMAC(ch,opname) \ - ( \ - const conj_t conjx, \ - const dim_t m, \ - const dim_t n, \ - const ctype* alpha, \ - const ctype* x, inc_t rs_x, inc_t cs_x, \ - ctype* y, inc_t rs_y, inc_t cs_y \ - ) \ -{ \ - PASTEMAC(ch,ch,ch,opname)( conjx, m, n, alpha, x, rs_x, cs_x, y, rs_y, cs_y ); \ -} - -INSERT_GENTFUNC_BASIC( scal2s_mxn ) - -#endif diff --git a/frame/include/level0/bli_scalcjs.h b/frame/include/level0/bli_scalcjs.h deleted file mode 100644 index 5619415cdd..0000000000 --- a/frame/include/level0/bli_scalcjs.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCALCJS_H -#define BLIS_SCALCJS_H - -// scalcjs - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#define bli_ssscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) -#define bli_dsscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_csscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) -#define bli_zsscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) - -#define bli_sdscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_ddscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_cdscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) -#define bli_zdscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_scscalcjs( conjx, x, y ) bli_scscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) -#define bli_dcscalcjs( conjx, x, y ) bli_scscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_ccscalcjs( conjx, x, y ) bli_cscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) -#define bli_zcscalcjs( conjx, x, y ) bli_cscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) - -#define bli_szscalcjs( conjx, x, y ) bli_dzscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_dzscalcjs( conjx, x, y ) bli_dzscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_czscalcjs( conjx, x, y ) bli_zscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) -#define bli_zzscalcjs( conjx, x, y ) bli_zscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_scscalcjs( conjx, x, y ) { (y) *= (x); } -#define bli_dcscalcjs( conjx, x, y ) { (y) *= (x); } -#define bli_ccscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conjf(x) : (x) ); } -#define bli_zcscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conj (x) : (x) ); } - -#define bli_szscalcjs( conjx, x, y ) { (y) *= (x); } -#define bli_dzscalcjs( conjx, x, y ) { (y) *= (x); } -#define bli_czscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conjf(x) : (x) ); } -#define bli_zzscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conj (x) : (x) ); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_sscalcjs( conjx, x, y ) bli_ssscalcjs( conjx, x, y ) -#define bli_dscalcjs( conjx, x, y ) bli_ddscalcjs( conjx, x, y ) -#define bli_cscalcjs( conjx, x, y ) bli_ccscalcjs( conjx, x, y ) -#define bli_zscalcjs( conjx, x, y ) bli_zzscalcjs( conjx, x, y ) - - -#endif - diff --git a/frame/include/level0/bli_scaljs.h b/frame/include/level0/bli_scaljs.h deleted file mode 100644 index 8fb2d2922d..0000000000 --- a/frame/include/level0/bli_scaljs.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCALJS_H -#define BLIS_SCALJS_H - -// scaljs - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of y. - -#define bli_ssscaljs( a, y ) bli_sscaljris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) -#define bli_dsscaljs( a, y ) bli_sscaljris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_csscaljs( a, y ) bli_sscaljris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_zsscaljs( a, y ) bli_sscaljris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) - -#define bli_sdscaljs( a, y ) bli_dscaljris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_ddscaljs( a, y ) bli_dscaljris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_cdscaljs( a, y ) bli_dscaljris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_zdscaljs( a, y ) bli_dscaljris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_scscaljs( a, y ) bli_scscaljris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) -#define bli_dcscaljs( a, y ) bli_scscaljris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_ccscaljs( a, y ) bli_cscaljris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_zcscaljs( a, y ) bli_cscaljris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) - -#define bli_szscaljs( a, y ) bli_dzscaljris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_dzscaljs( a, y ) bli_dzscaljris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_czscaljs( a, y ) bli_zscaljris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_zzscaljs( a, y ) bli_zscaljris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_scscaljs( a, y ) { (y) *= (a); } -#define bli_dcscaljs( a, y ) { (y) *= (a); } -#define bli_ccscaljs( a, y ) { (y) *= conjf(a); } -#define bli_zcscaljs( a, y ) { (y) *= conj (a); } - -#define bli_szscaljs( a, y ) { (y) *= (a); } -#define bli_dzscaljs( a, y ) { (y) *= (a); } -#define bli_czscaljs( a, y ) { (y) *= conjf(a); } -#define bli_zzscaljs( a, y ) { (y) *= conj (a); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_sscaljs( a, y ) bli_ssscaljs( a, y ) -#define bli_dscaljs( a, y ) bli_ddscaljs( a, y ) -#define bli_cscaljs( a, y ) bli_ccscaljs( a, y ) -#define bli_zscaljs( a, y ) bli_zzscaljs( a, y ) - - -#endif - diff --git a/frame/include/level0/bli_scals.h b/frame/include/level0/bli_scals.h deleted file mode 100644 index fc45c5e760..0000000000 --- a/frame/include/level0/bli_scals.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCALS_H -#define BLIS_SCALS_H - -// scals - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of y. - -#define bli_ssscals( a, y ) bli_sscalris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) -#define bli_dsscals( a, y ) bli_sscalris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_csscals( a, y ) bli_sscalris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_zsscals( a, y ) bli_sscalris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) - -#define bli_sdscals( a, y ) bli_dscalris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_ddscals( a, y ) bli_dscalris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_cdscals( a, y ) bli_dscalris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_zdscals( a, y ) bli_dscalris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_scscals( a, y ) bli_scscalris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) -#define bli_dcscals( a, y ) bli_scscalris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_ccscals( a, y ) bli_cscalris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_zcscals( a, y ) bli_cscalris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) - -#define bli_szscals( a, y ) bli_dzscalris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_dzscals( a, y ) bli_dzscalris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_czscals( a, y ) bli_zscalris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_zzscals( a, y ) bli_zscalris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_scscals( a, y ) { (y) *= (a); } -#define bli_dcscals( a, y ) { (y) *= (a); } -#define bli_ccscals( a, y ) { (y) *= (a); } -#define bli_zcscals( a, y ) { (y) *= (a); } - -#define bli_szscals( a, y ) { (y) *= (a); } -#define bli_dzscals( a, y ) { (y) *= (a); } -#define bli_czscals( a, y ) { (y) *= (a); } -#define bli_zzscals( a, y ) { (y) *= (a); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_sscals( a, y ) bli_ssscals( a, y ) -#define bli_dscals( a, y ) bli_ddscals( a, y ) -#define bli_cscals( a, y ) bli_ccscals( a, y ) -#define bli_zscals( a, y ) bli_zzscals( a, y ) - - -#endif - diff --git a/frame/include/level0/bli_set0s.h b/frame/include/level0/bli_set0s.h deleted file mode 100644 index 92aab787d4..0000000000 --- a/frame/include/level0/bli_set0s.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SET0S_H -#define BLIS_SET0S_H - -#define bli_sset0s( a ) bli_ssets( 0.0F, 0.0F, (a) ) -#define bli_dset0s( a ) bli_dsets( 0.0 , 0.0 , (a) ) -#define bli_cset0s( a ) bli_csets( 0.0F, 0.0F, (a) ) -#define bli_zset0s( a ) bli_zsets( 0.0 , 0.0 , (a) ) - -#endif - diff --git a/frame/include/level0/bli_set0s_edge.h b/frame/include/level0/bli_set0s_edge.h deleted file mode 100644 index ca57685fc5..0000000000 --- a/frame/include/level0/bli_set0s_edge.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SET0S_EDGE_H -#define BLIS_SET0S_EDGE_H - -// set0s_mxn - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#define GENTFUNC(ctype,ch,op) \ -\ -BLIS_INLINE void PASTEMAC(ch,op) \ - ( \ - const dim_t i, \ - const dim_t m, \ - const dim_t j, \ - const dim_t n, \ - ctype* restrict p, \ - const inc_t ldp \ - ) \ -{ \ - if ( i < m ) \ - { \ - PASTEMAC(ch,set0s_mxn) \ - ( \ - m - i, \ - j, \ - p + i*1, 1, ldp \ - ); \ - } \ -\ - if ( j < n ) \ - { \ - PASTEMAC(ch,set0s_mxn) \ - ( \ - m, \ - n - j, \ - p + j*ldp, 1, ldp \ - ); \ - } \ -} - -INSERT_GENTFUNC_BASIC(set0s_edge) - -#endif diff --git a/frame/include/level0/bli_set0s_mxn.h b/frame/include/level0/bli_set0s_mxn.h deleted file mode 100644 index ed2f9b159f..0000000000 --- a/frame/include/level0/bli_set0s_mxn.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SET0S_MXN_H -#define BLIS_SET0S_MXN_H - -// set0s_mxn - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -BLIS_INLINE void bli_sset0s_mxn( const dim_t m, const dim_t n, - float* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - bli_sset0s( *(y + i*rs_y + j*cs_y) ); -} - -BLIS_INLINE void bli_dset0s_mxn( const dim_t m, const dim_t n, - double* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - bli_dset0s( *(y + i*rs_y + j*cs_y) ); -} - -BLIS_INLINE void bli_cset0s_mxn( const dim_t m, const dim_t n, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - bli_cset0s( *(y + i*rs_y + j*cs_y) ); -} - -BLIS_INLINE void bli_zset0s_mxn( const dim_t m, const dim_t n, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) -{ - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - bli_zset0s( *(y + i*rs_y + j*cs_y) ); -} - -#endif diff --git a/frame/include/level0/bli_set1s.h b/frame/include/level0/bli_set1s.h deleted file mode 100644 index 98a58bc10d..0000000000 --- a/frame/include/level0/bli_set1s.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SET1S_H -#define BLIS_SET1S_H - -#define bli_sset1s( a ) bli_ssets( 1.0F, 0.0F, (a) ) -#define bli_dset1s( a ) bli_dsets( 1.0 , 0.0 , (a) ) -#define bli_cset1s( a ) bli_csets( 1.0F, 0.0F, (a) ) -#define bli_zset1s( a ) bli_zsets( 1.0 , 0.0 , (a) ) - -#endif - diff --git a/frame/include/level0/bli_seti0s.h b/frame/include/level0/bli_seti0s.h deleted file mode 100644 index 229d6b474e..0000000000 --- a/frame/include/level0/bli_seti0s.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SETI0S_H -#define BLIS_SETI0S_H - -#define bli_sseti0s( a ) bli_ssetis( 0.0F, (a) ) -#define bli_dseti0s( a ) bli_dsetis( 0.0 , (a) ) -#define bli_cseti0s( a ) bli_csetis( 0.0F, (a) ) -#define bli_zseti0s( a ) bli_zsetis( 0.0 , (a) ) - -#endif - diff --git a/frame/include/level0/bli_setis.h b/frame/include/level0/bli_setis.h deleted file mode 100644 index 4f508e4931..0000000000 --- a/frame/include/level0/bli_setis.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SETIS_H -#define BLIS_SETIS_H - -// setis - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#define bli_sssetis( xi, y ) { ; } -#define bli_dssetis( xi, y ) { ; } - -#define bli_sdsetis( xi, y ) { ; } -#define bli_ddsetis( xi, y ) { ; } - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_scsetis( xi, y ) { bli_cimag(y) = (xi); } -#define bli_dcsetis( xi, y ) { bli_cimag(y) = (xi); } - -#define bli_szsetis( xi, y ) { bli_zimag(y) = (xi); } -#define bli_dzsetis( xi, y ) { bli_zimag(y) = (xi); } - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_scsetis( xi, y ) { (y) = bli_creal(y) + (xi) * (I); } -#define bli_dcsetis( xi, y ) { (y) = bli_creal(y) + (xi) * (I); } - -#define bli_szsetis( xi, y ) { (y) = bli_zreal(y) + (xi) * (I); } -#define bli_dzsetis( xi, y ) { (y) = bli_zreal(y) + (xi) * (I); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_ssetis( xi, y ) bli_sssetis( xi, y ) -#define bli_dsetis( xi, y ) bli_ddsetis( xi, y ) -#define bli_csetis( xi, y ) bli_scsetis( xi, y ) -#define bli_zsetis( xi, y ) bli_dzsetis( xi, y ) - - -#endif - diff --git a/frame/include/level0/bli_sets.h b/frame/include/level0/bli_sets.h deleted file mode 100644 index 758fc29d6d..0000000000 --- a/frame/include/level0/bli_sets.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SETS_H -#define BLIS_SETS_H - -// sets - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#define bli_sssets( xr, xi, y ) { (y) = (xr); } -#define bli_dssets( xr, xi, y ) { (y) = (xr); } -#define bli_cssets( xr, xi, y ) { (y) = (xr); } -#define bli_zssets( xr, xi, y ) { (y) = (xr); } -#define bli_issets( xr, xi, y ) { (y) = (xr); } - -#define bli_sdsets( xr, xi, y ) { (y) = (xr); } -#define bli_ddsets( xr, xi, y ) { (y) = (xr); } -#define bli_cdsets( xr, xi, y ) { (y) = (xr); } -#define bli_zdsets( xr, xi, y ) { (y) = (xr); } -#define bli_idsets( xr, xi, y ) { (y) = (xr); } - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_scsets( xr, xi, y ) { bli_creal(y) = (xr); bli_cimag(y) = (xi); } -#define bli_dcsets( xr, xi, y ) { bli_creal(y) = (xr); bli_cimag(y) = (xi); } -#define bli_ccsets( xr, xi, y ) { bli_creal(y) = (xr); bli_cimag(y) = (xi); } -#define bli_zcsets( xr, xi, y ) { bli_creal(y) = (xr); bli_cimag(y) = (xi); } -#define bli_icsets( xr, xi, y ) { bli_creal(y) = (xr); bli_cimag(y) = (xi); } - -#define bli_szsets( xr, xi, y ) { bli_zreal(y) = (xr); bli_zimag(y) = (xi); } -#define bli_dzsets( xr, xi, y ) { bli_zreal(y) = (xr); bli_zimag(y) = (xi); } -#define bli_czsets( xr, xi, y ) { bli_zreal(y) = (xr); bli_zimag(y) = (xi); } -#define bli_zzsets( xr, xi, y ) { bli_zreal(y) = (xr); bli_zimag(y) = (xi); } -#define bli_izsets( xr, xi, y ) { bli_zreal(y) = (xr); bli_zimag(y) = (xi); } - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_scsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } -#define bli_dcsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } -#define bli_ccsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } -#define bli_zcsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } - -#define bli_szsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } -#define bli_dzsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } -#define bli_czsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } -#define bli_zzsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } - -#endif // BLIS_ENABLE_C99_COMPLEX - -#define bli_sisets( xr, xi, y ) { (y) = bli_sreal(xr); } -#define bli_disets( xr, xi, y ) { (y) = bli_dreal(xr); } -#define bli_cisets( xr, xi, y ) { (y) = bli_creal(xr); } -#define bli_zisets( xr, xi, y ) { (y) = bli_zreal(xr); } -#define bli_iisets( xr, xi, y ) { (y) = (xr); } - - -#define bli_ssets( xr, xi, y ) bli_sssets( xr, xi, y ) -#define bli_dsets( xr, xi, y ) bli_ddsets( xr, xi, y ) -#define bli_csets( xr, xi, y ) bli_scsets( xr, xi, y ) -#define bli_zsets( xr, xi, y ) bli_dzsets( xr, xi, y ) -#define bli_isets( xr, xi, y ) bli_disets( xr, xi, y ) - - -#endif - diff --git a/frame/include/level0/bli_sqrt2s.h b/frame/include/level0/bli_sqrt2s.h deleted file mode 100644 index 66ae9fe184..0000000000 --- a/frame/include/level0/bli_sqrt2s.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SQRT2S_H -#define BLIS_SQRT2S_H - -// sqrt2s - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of a. - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_sssqrt2s( x, a ) bli_ssqrt2ris( bli_sreal(x), bli_simag(x), bli_sreal(a), bli_simag(a) ) -#define bli_dssqrt2s( x, a ) bli_ssqrt2ris( bli_dreal(x), bli_dimag(x), bli_sreal(a), bli_simag(a) ) -#define bli_cssqrt2s( x, a ) bli_ssqrt2ris( bli_creal(x), bli_cimag(x), bli_sreal(a), bli_simag(a) ) -#define bli_zssqrt2s( x, a ) bli_ssqrt2ris( bli_zreal(x), bli_zimag(x), bli_sreal(a), bli_simag(a) ) - -#define bli_sdsqrt2s( x, a ) bli_dsqrt2ris( bli_sreal(x), bli_simag(x), bli_dreal(a), bli_dimag(a) ) -#define bli_ddsqrt2s( x, a ) bli_dsqrt2ris( bli_dreal(x), bli_dimag(x), bli_dreal(a), bli_dimag(a) ) -#define bli_cdsqrt2s( x, a ) bli_dsqrt2ris( bli_creal(x), bli_cimag(x), bli_dreal(a), bli_dimag(a) ) -#define bli_zdsqrt2s( x, a ) bli_dsqrt2ris( bli_zreal(x), bli_zimag(x), bli_dreal(a), bli_dimag(a) ) - -#define bli_scsqrt2s( x, a ) bli_scsqrt2ris( bli_sreal(x), bli_simag(x), bli_creal(a), bli_cimag(a) ) -#define bli_dcsqrt2s( x, a ) bli_scsqrt2ris( bli_dreal(x), bli_dimag(x), bli_creal(a), bli_cimag(a) ) -#define bli_ccsqrt2s( x, a ) bli_csqrt2ris( bli_creal(x), bli_cimag(x), bli_creal(a), bli_cimag(a) ) -#define bli_zcsqrt2s( x, a ) bli_csqrt2ris( bli_zreal(x), bli_zimag(x), bli_creal(a), bli_cimag(a) ) - -#define bli_szsqrt2s( x, a ) bli_dzsqrt2ris( bli_sreal(x), bli_simag(x), bli_zreal(a), bli_zimag(a) ) -#define bli_dzsqrt2s( x, a ) bli_dzsqrt2ris( bli_dreal(x), bli_dimag(x), bli_zreal(a), bli_zimag(a) ) -#define bli_czsqrt2s( x, a ) bli_zsqrt2ris( bli_creal(x), bli_cimag(x), bli_zreal(a), bli_zimag(a) ) -#define bli_zzsqrt2s( x, a ) bli_zsqrt2ris( bli_zreal(x), bli_zimag(x), bli_zreal(a), bli_zimag(a) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_sssqrt2s( x, a ) { (a) = ( float ) sqrtf( (x) ) ; } -#define bli_dssqrt2s( x, a ) { (a) = ( float ) sqrt ( (x) ) ; } -#define bli_cssqrt2s( x, a ) { (a) = ( float )bli_creal( csqrtf( (x) ) ); } -#define bli_zssqrt2s( x, a ) { (a) = ( float )bli_zreal( csqrt ( (x) ) ); } - -#define bli_sdsqrt2s( x, a ) { (a) = ( double ) sqrtf( (x) ) ; } -#define bli_ddsqrt2s( x, a ) { (a) = ( double ) sqrt ( (x) ) ; } -#define bli_cdsqrt2s( x, a ) { (a) = ( double )bli_creal( csqrtf( (x) ) ); } -#define bli_zdsqrt2s( x, a ) { (a) = ( double )bli_zreal( csqrt ( (x) ) ); } - -#define bli_scsqrt2s( x, a ) { (a) = ( scomplex ) sqrtf( (x) ) ; } -#define bli_dcsqrt2s( x, a ) { (a) = ( scomplex ) sqrt ( (x) ) ; } -#define bli_ccsqrt2s( x, a ) { (a) = ( scomplex ) csqrtf( (x) ) ; } -#define bli_zcsqrt2s( x, a ) { (a) = ( scomplex ) csqrt ( (x) ) ; } - -#define bli_szsqrt2s( x, a ) { (a) = ( dcomplex ) sqrtf( (x) ) ; } -#define bli_dzsqrt2s( x, a ) { (a) = ( dcomplex ) sqrt ( (x) ) ; } -#define bli_czsqrt2s( x, a ) { (a) = ( dcomplex ) csqrtf( (x) ) ; } -#define bli_zzsqrt2s( x, a ) { (a) = ( dcomplex ) csqrt ( (x) ) ; } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_ssqrt2s( x, a ) bli_sssqrt2s( x, a ) -#define bli_dsqrt2s( x, a ) bli_ddsqrt2s( x, a ) -#define bli_csqrt2s( x, a ) bli_ccsqrt2s( x, a ) -#define bli_zsqrt2s( x, a ) bli_zzsqrt2s( x, a ) - - -#endif diff --git a/frame/include/level0/bli_subjs.h b/frame/include/level0/bli_subjs.h deleted file mode 100644 index f453fa21fd..0000000000 --- a/frame/include/level0/bli_subjs.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SUBJS_H -#define BLIS_SUBJS_H - -// subjs - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of y. - -#define bli_sssubjs( a, y ) bli_ssubjris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) -#define bli_dssubjs( a, y ) bli_ssubjris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_cssubjs( a, y ) bli_ssubjris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_zssubjs( a, y ) bli_ssubjris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) - -#define bli_sdsubjs( a, y ) bli_dsubjris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_ddsubjs( a, y ) bli_dsubjris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_cdsubjs( a, y ) bli_dsubjris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_zdsubjs( a, y ) bli_dsubjris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_scsubjs( a, y ) bli_csubjris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) -#define bli_dcsubjs( a, y ) bli_csubjris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_ccsubjs( a, y ) bli_csubjris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_zcsubjs( a, y ) bli_csubjris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) - -#define bli_szsubjs( a, y ) bli_zsubjris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_dzsubjs( a, y ) bli_zsubjris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_czsubjs( a, y ) bli_zsubjris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_zzsubjs( a, y ) bli_zsubjris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_scsubjs( a, y ) { (y) -= (a); } -#define bli_dcsubjs( a, y ) { (y) -= (a); } -#define bli_ccsubjs( a, y ) { (y) -= conjf(a); } -#define bli_zcsubjs( a, y ) { (y) -= conj (a); } - -#define bli_szsubjs( a, y ) { (y) -= (a); } -#define bli_dzsubjs( a, y ) { (y) -= (a); } -#define bli_czsubjs( a, y ) { (y) -= conjf(a); } -#define bli_zzsubjs( a, y ) { (y) -= conj (a); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_ssubjs( a, y ) bli_sssubjs( a, y ) -#define bli_dsubjs( a, y ) bli_ddsubjs( a, y ) -#define bli_csubjs( a, y ) bli_ccsubjs( a, y ) -#define bli_zsubjs( a, y ) bli_zzsubjs( a, y ) - - -#endif - diff --git a/frame/include/level0/bli_subs.h b/frame/include/level0/bli_subs.h deleted file mode 100644 index 2c9a79dab8..0000000000 --- a/frame/include/level0/bli_subs.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SUBS_H -#define BLIS_SUBS_H - -// subs - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of y. - -#define bli_sssubs( a, y ) bli_ssubris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) -#define bli_dssubs( a, y ) bli_ssubris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_cssubs( a, y ) bli_ssubris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) -#define bli_zssubs( a, y ) bli_ssubris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) - -#define bli_sdsubs( a, y ) bli_dsubris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_ddsubs( a, y ) bli_dsubris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_cdsubs( a, y ) bli_dsubris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) -#define bli_zdsubs( a, y ) bli_dsubris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_scsubs( a, y ) bli_csubris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) -#define bli_dcsubs( a, y ) bli_csubris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_ccsubs( a, y ) bli_csubris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) -#define bli_zcsubs( a, y ) bli_csubris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) - -#define bli_szsubs( a, y ) bli_zsubris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_dzsubs( a, y ) bli_zsubris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_czsubs( a, y ) bli_zsubris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) -#define bli_zzsubs( a, y ) bli_zsubris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_scsubs( a, y ) { (y) -= (a); } -#define bli_dcsubs( a, y ) { (y) -= (a); } -#define bli_ccsubs( a, y ) { (y) -= (a); } -#define bli_zcsubs( a, y ) { (y) -= (a); } - -#define bli_szsubs( a, y ) { (y) -= (a); } -#define bli_dzsubs( a, y ) { (y) -= (a); } -#define bli_czsubs( a, y ) { (y) -= (a); } -#define bli_zzsubs( a, y ) { (y) -= (a); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_ssubs( a, y ) bli_sssubs( a, y ) -#define bli_dsubs( a, y ) bli_ddsubs( a, y ) -#define bli_csubs( a, y ) bli_ccsubs( a, y ) -#define bli_zsubs( a, y ) bli_zzsubs( a, y ) - - -#endif - diff --git a/frame/include/level0/bli_swaps.h b/frame/include/level0/bli_swaps.h deleted file mode 100644 index fe18d94fd8..0000000000 --- a/frame/include/level0/bli_swaps.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SWAPS_H -#define BLIS_SWAPS_H - -// swaps - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - - -#define bli_ssswaps( x, y ) \ -{ \ - float w; \ - bli_sscopys( (y), (w) ); \ - bli_sscopys( (x), (y) ); \ - bli_sscopys( (w), (x) ); \ -} -#define bli_dsswaps( x, y ) \ -{ \ - double w; \ - bli_sdcopys( (y), (w) ); \ - bli_dscopys( (x), (y) ); \ - bli_ddcopys( (w), (x) ); \ -} -#define bli_csswaps( x, y ) \ -{ \ - scomplex w; \ - bli_sccopys( (y), (w) ); \ - bli_cscopys( (x), (y) ); \ - bli_cccopys( (w), (x) ); \ -} -#define bli_zsswaps( x, y ) \ -{ \ - dcomplex w; \ - bli_szcopys( (y), (w) ); \ - bli_zscopys( (x), (y) ); \ - bli_zzcopys( (w), (x) ); \ -} - - -#define bli_sdswaps( x, y ) \ -{ \ - float w; \ - bli_dscopys( (y), (w) ); \ - bli_sdcopys( (x), (y) ); \ - bli_sscopys( (w), (x) ); \ -} -#define bli_ddswaps( x, y ) \ -{ \ - double w; \ - bli_ddcopys( (y), (w) ); \ - bli_ddcopys( (x), (y) ); \ - bli_ddcopys( (w), (x) ); \ -} -#define bli_cdswaps( x, y ) \ -{ \ - scomplex w; \ - bli_dccopys( (y), (w) ); \ - bli_cdcopys( (x), (y) ); \ - bli_cccopys( (w), (x) ); \ -} -#define bli_zdswaps( x, y ) \ -{ \ - dcomplex w; \ - bli_dzcopys( (y), (w) ); \ - bli_zdcopys( (x), (y) ); \ - bli_zzcopys( (w), (x) ); \ -} - - -#define bli_scswaps( x, y ) \ -{ \ - float w; \ - bli_cscopys( (y), (w) ); \ - bli_sccopys( (x), (y) ); \ - bli_sscopys( (w), (x) ); \ -} -#define bli_dcswaps( x, y ) \ -{ \ - double w; \ - bli_cdcopys( (y), (w) ); \ - bli_dccopys( (x), (y) ); \ - bli_ddcopys( (w), (x) ); \ -} -#define bli_ccswaps( x, y ) \ -{ \ - scomplex w; \ - bli_cccopys( (y), (w) ); \ - bli_cccopys( (x), (y) ); \ - bli_cccopys( (w), (x) ); \ -} -#define bli_zcswaps( x, y ) \ -{ \ - dcomplex w; \ - bli_czcopys( (y), (w) ); \ - bli_zccopys( (x), (y) ); \ - bli_zzcopys( (w), (x) ); \ -} - - -#define bli_szswaps( x, y ) \ -{ \ - float w; \ - bli_zscopys( (y), (w) ); \ - bli_szcopys( (x), (y) ); \ - bli_sscopys( (w), (x) ); \ -} -#define bli_dzswaps( x, y ) \ -{ \ - double w; \ - bli_zdcopys( (y), (w) ); \ - bli_dzcopys( (x), (y) ); \ - bli_ddcopys( (w), (x) ); \ -} -#define bli_czswaps( x, y ) \ -{ \ - scomplex w; \ - bli_zccopys( (y), (w) ); \ - bli_czcopys( (x), (y) ); \ - bli_cccopys( (w), (x) ); \ -} -#define bli_zzswaps( x, y ) \ -{ \ - dcomplex w; \ - bli_zzcopys( (y), (w) ); \ - bli_zzcopys( (x), (y) ); \ - bli_zzcopys( (w), (x) ); \ -} - - -#define bli_sswaps( x, y ) bli_ssswaps( x, y ) -#define bli_dswaps( x, y ) bli_ddswaps( x, y ) -#define bli_cswaps( x, y ) bli_ccswaps( x, y ) -#define bli_zswaps( x, y ) bli_zzswaps( x, y ) - - -#endif diff --git a/frame/include/level0/bli_tabsq2s.h b/frame/include/level0/bli_tabsq2s.h new file mode 100644 index 0000000000..72b5f83826 --- /dev/null +++ b/frame/include/level0/bli_tabsq2s.h @@ -0,0 +1,135 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TABSQ2S_H +#define BLIS_TABSQ2S_H + +// -- Implementation macro ----------------------------------------------------- + +// (yr) = (xr) * (xr) + (xi) * (xi); +// (yi) = 0; + +#define bli_tabsq2ims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + chc \ + ) \ +{ \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(dx,dx,termrr)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(px,chc,tcast)(xr), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ), \ + PASTEMAC(dx,dx,termii)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(px,chc,tcast)(xi), \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ) \ + ) \ + ),\ + PASTEMAC(py,zero), \ + yr, \ + yi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tabsq2s +#define bli_tabsq2s( chx, chy, chc, x, y ) \ + bli_tabsq2ims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// -- Exposed real/imaginary -- + +// tabsq2ris +#define bli_tabsq2ris( chx, chy, chc, xr, xi, yr, yi ) \ + bli_tabsq2ims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r +// (yr) := (xr) * (xr); +// (yi) xx 0 ; + +// r c +// (yr) := (xr) * (xr) + (xi) * (xi); +// (yi) xx 0 ; + +// c r +// (yr) := (xr) * (xr); +// (yi) := 0 ; + +// c c +// (yr) := (xr) * (xr) + (xi) * (xi); +// (yi) := 0 ; + +#endif + diff --git a/frame/include/level0/bli_tabval2s.h b/frame/include/level0/bli_tabval2s.h new file mode 100644 index 0000000000..abcd66ba76 --- /dev/null +++ b/frame/include/level0/bli_tabval2s.h @@ -0,0 +1,206 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TABVAL2S_H +#define BLIS_TABVAL2S_H + +// -- Implementation macro ----------------------------------------------------- + +#define bli_tabval2ims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + chc \ + ) \ +{ \ + PASTEMAC(dx,abval2ims) \ + ( \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + chc \ + ); \ +} + +// -- real-to-real domain implementation -- +// -- real-to-complex domain implementation -- + +// (yr) = abs( xr ); +// (yi) = 0; + +#define bli_rabval2ims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + chc \ + ) \ +{ \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,abs)( \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ), \ + PASTEMAC(py,zero), \ + yr, \ + yi \ + ) \ +} \ + +// -- complex-to-real domain implementation -- +// -- complex-to-complex domain implementation -- + +// NOTE: Instead of defining abval2 in terms of bli_?hypot(), we use an +// alternate definition that can avoid overflow in the final result due +// to overflow in the intermediate results (e.g. xr * xr and xi * xi). + +// xmaxr = maxabs( xr, xi ); +// if ( s == 0.0 ) mag = 0.0; +// else mag = sqrt( xmaxr ) * +// sqrt( ( xr / xmaxr ) * xr + +// ( xi / xmaxr ) * xi ); +// yr = mag; +// yi = 0.0; + +#define bli_cabval2ims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + chc \ + ) \ +{ \ + PASTEMAC(ro,declinits) \ + ( \ + px, \ + PASTEMAC(px,maxabs)(xr,xi), \ + xmaxr \ + ) \ + PASTEMAC(dy,assigns) \ + ( \ + ( PASTEMAC(teq0s)(px,xmaxr) && \ + !PASTEMAC(px,isnan)(xi) && \ + !PASTEMAC(px,isnan)(xr) \ + ? PASTEMAC(py,zero) \ + : PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,mul)( \ + PASTEMAC(chc,sqrt)( \ + PASTEMAC(px,chc,tcast)(xmaxr) \ + ), \ + PASTEMAC(chc,sqrt)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(chc,mul)( \ + PASTEMAC(px,chc,tcast)(xr), \ + PASTEMAC(chc,div)( \ + PASTEMAC(px,chc,tcast)(xr), \ + PASTEMAC(px,chc,tcast)(xmaxr) \ + ) \ + ), \ + PASTEMAC(chc,mul)( \ + PASTEMAC(px,chc,tcast)(xi), \ + PASTEMAC(chc,div)( \ + PASTEMAC(px,chc,tcast)(xi), \ + PASTEMAC(px,chc,tcast)(xmaxr) \ + ) \ + ) \ + ) \ + ) \ + ) \ + ) \ + ), \ + PASTEMAC(py,zero), \ + yr, \ + yi \ + ) \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tabval2s +#define bli_tabval2s( chx, chy, chc, x, y ) \ + bli_tabval2ims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// -- Exposed real/imaginary -- + +// tabval2ris +#define bli_tabval2ris( chx, chy, chc, xr, xi, yr, yi ) \ + bli_tabval2ims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r +// (yr) := abs(xr); +// (yi) xx 0 ; + +// r c +// (yr) := sqrt(s) * sqrt( ( xr / s ) * xr + ( xi / s ) * xi ); +// (yi) xx 0 ; + +// c r +// (yr) := abs(xr); +// (yi) := 0 ; + +// c c +// (yr) := sqrt(s) * sqrt( ( xr / s ) * xr + ( xi / s ) * xi ); +// (yi) := 0 ; + +#endif + diff --git a/frame/include/level0/bli_tadd3s.h b/frame/include/level0/bli_tadd3s.h new file mode 100644 index 0000000000..d3c700d699 --- /dev/null +++ b/frame/include/level0/bli_tadd3s.h @@ -0,0 +1,192 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TADD3S_H +#define BLIS_TADD3S_H + +// -- Implementation macro ----------------------------------------------------- + +// (zr) = (yr) + (xr); +// (zi) = (yi) + (xi); + +#define bli_tadd3ims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + dz, pz, zr, zi, \ + chc \ + ) \ +{ \ + PASTEMAC(dz,assigns) \ + ( \ + PASTEMAC(chc,pz,tcast)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(py,chc,tcast)(yr), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ),\ + PASTEMAC(chc,pz,tcast)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(py,chc,tcast)(yi), \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ), \ + zr, \ + zi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tadd3s +#define bli_tadd3s( chx, chy, chz, chc, x, y, z ) \ + bli_tadd3ims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chz,dom), \ + PASTEMAC(chz,prec), \ + PASTEMAC(chz,real)(z), \ + PASTEMAC(chz,imag)(z), \ + PASTEMAC(chc,prec) \ + ) + +// tadd3s unit test +#undef GENTFUNC +#define GENTFUNC( ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc, opname ) \ +UNIT_TEST(chx,chy,chz,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto z0 = convert( convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + ctypez z; \ + bli_tadds( chx,chy,chz,chc, x, y, z ); \ +\ + INFO( "z (C++): " << z0 ); \ + INFO( "z (BLIS): " << z ); \ +\ + check( z, z0 ); \ + } \ +) + +// tadd3js +#define bli_tadd3js( chx, chy, chz, chc, x, y, z ) \ + bli_tadd3ims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(x) \ + ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chz,dom), \ + PASTEMAC(chz,prec), \ + PASTEMAC(chz,real)(z), \ + PASTEMAC(chz,imag)(z), \ + PASTEMAC(chc,prec) \ + ) + +// -- Exposed real/imaginary -- + +// taddris +#define bli_taddris( chx, chy, chc, xr, xi, yr, yi ) \ + bli_taddims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// taddjris +#define bli_taddjris( chx, chy, chc, xr, xi, yr, yi ) \ + bli_taddims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + xi ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r +// (yr) += (xr); +// (yi) xx 0 ; + +// r c +// (yr) += (xr); +// (yi) xx (xi); + +// c r +// (yr) += (xr); +// (yi) += 0 ; + +// c c +// (yr) += (xr); +// (yi) += (xi); + +#endif + diff --git a/frame/include/level0/bli_tadds.h b/frame/include/level0/bli_tadds.h new file mode 100644 index 0000000000..a49e8de07e --- /dev/null +++ b/frame/include/level0/bli_tadds.h @@ -0,0 +1,176 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TADDS_H +#define BLIS_TADDS_H + +// -- Implementation macro ----------------------------------------------------- + +// (yr) = (yr) + (xr); +// (yi) = (yi) + (xi); + +#define bli_taddims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + chc \ + ) \ +{ \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(py,chc,tcast)(yr), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ),\ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(py,chc,tcast)(yi), \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ), \ + yr, \ + yi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tadds +#define bli_tadds( chx, chy, chc, x, y ) \ + bli_taddims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// taddjs +#define bli_taddjs( chx, chy, chc, x, y ) \ + bli_taddims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(x) \ + ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// -- Exposed real/imaginary -- + +// taddris +#define bli_taddris( chx, chy, chc, xr, xi, yr, yi ) \ + bli_taddims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// taddjris +#define bli_taddjris( chx, chy, chc, xr, xi, yr, yi ) \ + bli_taddims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + xi ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- mxn -- + +// tadds_mxn +#define bli_tadds_mxn( chx, chy, chc, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ +{ \ + for ( dim_t jj = 0; jj < (n); ++jj ) \ + for ( dim_t ii = 0; ii < (m); ++ii ) \ + { \ + PASTEMAC(chx,ctype)* restrict xij = (x) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = (y) + ii*(rs_y) + jj*(cs_y); \ +\ + bli_tadds( chx,chy,chc, *xij, *yij ); \ + } \ +} + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r +// (yr) += (xr); +// (yi) xx 0 ; + +// r c +// (yr) += (xr); +// (yi) xx (xi); + +// c r +// (yr) += (xr); +// (yi) += 0 ; + +// c c +// (yr) += (xr); +// (yi) += (xi); + +#endif + diff --git a/frame/include/level0/bli_taxpbys.h b/frame/include/level0/bli_taxpbys.h new file mode 100644 index 0000000000..76dd8d6b3d --- /dev/null +++ b/frame/include/level0/bli_taxpbys.h @@ -0,0 +1,264 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TAXPBYS_H +#define BLIS_TAXPBYS_H + +// -- Implementation macro ----------------------------------------------------- + +// (yorigr) := (yr) +// (yorigi) := (yi) +// (yr) := (ar) * (xr) - (ai) * (xi) + (br) * (yorigr) - (bi) * (yorigi); +// (yi) := (ai) * (xr) + (ar) * (xi) + (bi) * (yorigr) + (br) * (yorigi); + +#define bli_taxpbyims( \ + \ + da, pa, ar, ai, \ + dx, px, xr, xi, \ + db, pb, br, bi, \ + dy, py, yr, yi, \ + chc \ + ) \ +{ \ + PASTEMAC(dy,declinits)( py, yr, yi, yorigr, yorigi ) \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(chc,sub)( \ + PASTEMAC(da,dx,termrr)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ar), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ), \ + PASTEMAC(da,dx,termii)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ai), \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ) \ + ), \ + PASTEMAC(chc,sub)( \ + PASTEMAC(db,dy,termrr)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pb,chc,tcast)(br), \ + PASTEMAC(py,chc,tcast)(yorigr) \ + ) \ + ), \ + PASTEMAC(db,dy,termii)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pb,chc,tcast)(bi), \ + PASTEMAC(py,chc,tcast)(yorigi) \ + ) \ + ) \ + ) \ + ) \ + ),\ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(da,dx,termir)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ai), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ), \ + PASTEMAC(da,dx,termri)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ar), \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ) \ + ), \ + PASTEMAC(chc,add)( \ + PASTEMAC(db,dy,termir)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pb,chc,tcast)(bi), \ + PASTEMAC(py,chc,tcast)(yorigr) \ + ) \ + ), \ + PASTEMAC(db,dy,termri)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pb,chc,tcast)(br), \ + PASTEMAC(py,chc,tcast)(yorigi) \ + ) \ + ) \ + ) \ + ) \ + ), \ + yr, \ + yi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// taxpbys +#define bli_taxpbys( cha, chx, chb, chy, chc, a, x, b, y ) \ + bli_taxpbyims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chb,dom), \ + PASTEMAC(chb,prec), \ + PASTEMAC(chb,real)(b), \ + PASTEMAC(chb,imag)(b), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// taxpbyjs +#define bli_taxpbyjs( cha, chx, chb, chy, chc, a, x, b, y ) \ + bli_taxpbyims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(x) \ + ), \ + PASTEMAC(chb,dom), \ + PASTEMAC(chb,prec), \ + PASTEMAC(chb,real)(b), \ + PASTEMAC(chb,imag)(b), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// -- Exposed real/imaginary -- + +// taxpbyris +#define bli_taxpbyris( cha, chx, chb, chy, chc, ar, ai, xr, xi, br, bi, yr, yi ) \ + bli_taxpbyims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + ar, \ + ai, \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chb,dom), \ + PASTEMAC(chb,prec), \ + br, \ + bi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// taxpbyjris +#define bli_taxpbyjris( cha, chx, chb, chy, chc, ar, ai, xr, xi, br, bi, yr, yi ) \ + bli_taxpbyims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + ar, \ + ai, \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + xi ), \ + PASTEMAC(chb,dom), \ + PASTEMAC(chb,prec), \ + br, \ + bi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- mxn -- + +// axpbys_mxn +#define bli_taxpbys_mxn( cha, chx, chb, chy, chc, m, n, alpha, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ +{ \ +\ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_teq0s( chb, *(beta) ) ) \ + { \ + bli_tscal2s_mxn( cha, chx, chy, chc, BLIS_NO_CONJUGATE, m, n, alpha, x, rs_x, cs_x, y, rs_y, cs_y ); \ + } \ + else \ + { \ + for ( dim_t jj = 0; jj < n; ++jj ) \ + for ( dim_t ii = 0; ii < m; ++ii ) \ + { \ + PASTEMAC(chx,ctype)* restrict xij = x + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = y + ii*(rs_y) + jj*(cs_y); \ +\ + bli_taxpbys( cha,chx,chb,chy,chc, *(alpha), *xij, *(beta), *yij ); \ + } \ + } \ +} + +// -- Notes -------------------------------------------------------------------- + +#endif + diff --git a/frame/include/level0/bli_taxpys.h b/frame/include/level0/bli_taxpys.h new file mode 100644 index 0000000000..9dec13ee17 --- /dev/null +++ b/frame/include/level0/bli_taxpys.h @@ -0,0 +1,224 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TAXPYS_H +#define BLIS_TAXPYS_H + +// -- Implementation macro ----------------------------------------------------- + +// (yr) += (ar) * (xr) - (ai) * (xi); +// (yi) += (ai) * (xr) + (ar) * (xi); + +#define bli_taxpyims( \ + \ + da, pa, ar, ai, \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + chc \ + ) \ +{ \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(py,chc,tcast)(yr), \ + PASTEMAC(chc,sub)( \ + PASTEMAC(da,dx,termrr)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ar), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ), \ + PASTEMAC(da,dx,termii)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ai), \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ) \ + ) \ + ) \ + ),\ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(py,chc,tcast)(yi), \ + PASTEMAC(chc,add)( \ + PASTEMAC(da,dx,termir)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ai), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ), \ + PASTEMAC(da,dx,termri)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ar), \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ) \ + ) \ + ) \ + ), \ + yr, \ + yi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// taxpys +#define bli_taxpys( cha, chx, chy, chc, a, x, y ) \ + bli_taxpyims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// taxpyjs +#define bli_taxpyjs( cha, chx, chy, chc, a, x, y ) \ + bli_taxpyims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(x) \ + ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// -- Exposed real/imaginary -- + +// taxpyris +#define bli_taxpyris( cha, chx, chy, chc, ar, ai, xr, xi, yr, yi ) \ + bli_taxpyims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + ar, \ + ai, \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// taxpyjris +#define bli_taxpyjris( cha, chx, chy, chc, ar, ai, xr, xi, yr, yi ) \ + bli_taxpyims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + ar, \ + ai, \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + xi ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r r +// (yr) += (ar) * (xr) - 0 * 0 ; +// (yi) xx 0 * (xr) + (ar) * 0 ; + +// r r c +// (yr) += (ar) * (xr) - 0 * (xi); +// (yi) xx 0 * (xr) + (ar) * (xi); + +// r c r +// (yr) += (ar) * (xr) - (ai) * 0 ; +// (yi) xx (ai) * (xr) + (ar) * 0 ; + +// r c c +// (yr) += (ar) * (xr) - (ai) * (xi); +// (yi) xx (ai) * (xr) + (ar) * (xi); + +// c r r +// (yr) += (ar) * (xr) - 0 * 0 ; +// (yi) += 0 * (xr) + (ar) * 0 ; + +// c r c +// (yr) += (ar) * (xr) - 0 * (xi); +// (yi) += 0 * (xr) + (ar) * (xi); + +// c c r +// (yr) += (ar) * (xr) - (ai) * 0 ; +// (yi) += (ai) * (xr) + (ar) * 0 ; + +// c c c +// (yr) += (ar) * (xr) - (ai) * (xi); +// (yi) += (ai) * (xr) + (ar) * (xi); + +#endif + diff --git a/frame/include/level0/1r/bli_scal21rs.h b/frame/include/level0/bli_tconjs.h similarity index 60% rename from frame/include/level0/1r/bli_scal21rs.h rename to frame/include/level0/bli_tconjs.h index 2f0590a624..5706f68862 100644 --- a/frame/include/level0/1r/bli_scal21rs.h +++ b/frame/include/level0/bli_tconjs.h @@ -32,34 +32,57 @@ */ -#ifndef BLIS_SCAL21RS_H -#define BLIS_SCAL21RS_H +#ifndef BLIS_TCONJS_H +#define BLIS_TCONJS_H -// scal21rs +// -- Implementation macro ----------------------------------------------------- -#define bli_cscscal21rs( a, x, yr, yi ) \ -{ \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), yr, yi ); \ -} +// (xr) := (xr) +// (xi) := -(xi) -#define bli_cccscal21rs( a, x, yr, yi ) \ +#define bli_tconjims( \ + \ + dx, px, xr, xi \ + ) \ { \ - bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), yr, yi ); \ + PASTEMAC(dx,assigns) \ + ( \ + xr, \ + PASTEMAC(px,neg)(xi),\ + xr, \ + xi \ + ); \ } -#define bli_zdzscal21rs( a, x, yr, yi ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), yr, yi ); \ -} +// -- API macros --------------------------------------------------------------- -#define bli_zzzscal21rs( a, x, yr, yi ) \ -{ \ - bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), yr, yi ); \ -} +// -- Consolidated -- + +// tconjs +#define bli_tconjs( chx, x ) \ + bli_tconjims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x) \ + ) + +// -- Exposed real/imaginary -- + +// tconjris +#define bli_tconjris( chx, xr, xi ) \ + bli_tconjims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi \ + ) +// -- Higher-level static functions -------------------------------------------- -#define bli_cscal21rs( a, x, yr, yi ) bli_cccscal21rs( a, x, yr, yi ) -#define bli_zscal21rs( a, x, yr, yi ) bli_zzzscal21rs( a, x, yr, yi ) +// -- Notes -------------------------------------------------------------------- #endif diff --git a/frame/include/level0/bli_tcopycjs.h b/frame/include/level0/bli_tcopycjs.h new file mode 100644 index 0000000000..538cd5c124 --- /dev/null +++ b/frame/include/level0/bli_tcopycjs.h @@ -0,0 +1,122 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TCOPYCJS_H +#define BLIS_TCOPYCJS_H + +// -- Implementation macro ----------------------------------------------------- + +// (yr) := (xr); +// (yi) := ( is_conj( conj ) ? -(xi) : (xi) ); + +#define bli_tcopycjims( \ + \ + conj, \ + dx, px, xr, xi, \ + dy, py, yr, yi \ + ) \ +{ \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(px,py,tcast)(xr), \ + ( bli_is_conj( conj ) ? PASTEMAC(px,neg)( \ + PASTEMAC(px,py,tcast)(xi) \ + ) \ + : PASTEMAC(px,py,tcast)(xi) \ + ), \ + yr, \ + yi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tcopycjs +#define bli_tcopycjs( chx, chy, conj, x, y ) \ + bli_tcopycjims \ + ( \ + conj, \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y) \ + ) + +// -- Exposed real/imaginary -- + +// tcopyris +#define bli_tcopycjris( chx, chy, conj, xr, xi, yr, yi ) \ + bli_tcopycjims \ + ( \ + conj, \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r +// (yr) := (xr); +// (yi) xx 0 ; + +// r c +// (yr) := (xr); +// (yi) xx (xi); + +// c r +// (yr) := (xr); +// (yi) := 0 ; + +// c c +// (yr) := (xr); +// (yi) := (xi); + +#endif + diff --git a/frame/include/level0/bli_tcopynzs.h b/frame/include/level0/bli_tcopynzs.h new file mode 100644 index 0000000000..b777da9d4f --- /dev/null +++ b/frame/include/level0/bli_tcopynzs.h @@ -0,0 +1,191 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TCOPYNZS_H +#define BLIS_TCOPYNZS_H + +// -- Implementation macro ----------------------------------------------------- + +// (yr) := (xr); +// if ( is_complex( x ) ) +// (yi) := (xi); + +#define bli_tcopynzims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi \ + ) \ +{ \ + PASTEMAC(dx,dy,copynzims) \ + ( \ + dx, px, xr, xi, \ + dy, py, yr, yi \ + ); \ +} + +// -- real-to-real domain implementation -- + +#define bli_rrcopynzims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi \ + ) \ +{ \ + PASTEMAC(r,assigns) \ + ( \ + PASTEMAC(px,py,tcast)(xr), \ + PASTEMAC(px,py,tcast)(xi), \ + yr, \ + yi \ + ); \ +} + +// -- complex-to-real domain implementation -- +// -- real-to-complex domain implementation -- + +// NOTE: Normally, the real-to-complex case would take place in the complex +// domain (in that an implicit zero would be copied to y.imag), but since +// this is copynz, we avoid updating the imaginary parts of complex y when +// x is real. Thus, real-to-complex ends up getting implemented the same as +// real-to-real (and complex-to-real). + +#define bli_rccopynzims bli_rrcopynzims +#define bli_crcopynzims bli_rrcopynzims + +// -- complex-to-complex domain implementation -- + +#define bli_cccopynzims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi \ + ) \ +{ \ + PASTEMAC(c,assigns) \ + ( \ + PASTEMAC(px,py,tcast)(xr), \ + PASTEMAC(px,py,tcast)(xi), \ + yr, \ + yi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tcopynzs +#define bli_tcopynzs( chx, chy, x, y ) \ + bli_tcopynzims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y) \ + ) + +// tcopyjnzs +#define bli_tcopyjnzs( chx, chy, x, y ) \ + bli_tcopynzims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(x) \ + ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y) \ + ) + +// -- Exposed real/imaginary -- + +// tcopynzris +#define bli_tcopynzris( chx, chy, xr, xi, yr, yi ) \ + bli_tcopynzims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi \ + ) + +// tcopyjnzris +#define bli_tcopyjnzris( chx, chy, xr, xi, yr, yi ) \ + bli_tcopynzims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + xi ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r +// (yr) := (xr); +// (yi) xx 0 ; + +// r c +// (yr) := (xr); +// (yi) xx (xi); + +// c r +// (yr) := (xr); +// (yi) xx 0 ; // NOTE: This is what copynzs does differently from copys. + +// c c +// (yr) := (xr); +// (yi) := (xi); + +#endif + diff --git a/frame/include/level0/bli_tcopys.h b/frame/include/level0/bli_tcopys.h new file mode 100644 index 0000000000..e65403787d --- /dev/null +++ b/frame/include/level0/bli_tcopys.h @@ -0,0 +1,246 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TCOPYS_H +#define BLIS_TCOPYS_H + +// -- Implementation macro ----------------------------------------------------- + +// (yr) := (xr); +// (yi) := (xi); + +#define bli_tcopyims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi \ + ) \ +{ \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(px,py,tcast)(xr), \ + PASTEMAC(px,py,tcast)(xi), \ + yr, \ + yi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tcopys +#define bli_tcopys( chx, chy, x, y ) \ + bli_tcopyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y) \ + ) + +// tcopyjs +#define bli_tcopyjs( chx, chy, x, y ) \ + bli_tcopyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(x) \ + ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y) \ + ) + +// -- Exposed real/imaginary -- + +// tcopyris +#define bli_tcopyris( chx, chy, xr, xi, yr, yi ) \ + bli_tcopyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi \ + ) + +// tcopyjris +#define bli_tcopyjris( chx, chy, xr, xi, yr, yi ) \ + bli_tcopyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + xi ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi \ + ) + +// -- 1e / 1r -- + +// tcopy1es +#define bli_tcopy1es( chx, chy, x, yri, yir ) \ + bli_tcopyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x) \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(yri), \ + PASTEMAC(chy,imag)(yri) \ + ); \ + bli_tcopyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(x) \ + ), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(yir), \ + PASTEMAC(chy,imag)(yir) \ + ) + +// tcopyj1es +#define bli_tcopyj1es( chx, chy, x, yri, yir ) \ + bli_tcopyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(x) \ + ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(yri), \ + PASTEMAC(chy,imag)(yri) \ + ); \ + bli_tcopyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(yir), \ + PASTEMAC(chy,imag)(yir) \ + ) + +// tcopy1rs +#define bli_tcopy1rs( chx, chy, x, yr, yi ) \ + bli_tcopyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x) \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi \ + ) + +// tcopyj1rs +#define bli_tcopyj1rs( chx, chy, x, yr, yi ) \ + bli_tcopyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(x) \ + ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- mxn -- + +#define bli_tcopys_mxn( chx, chy, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ +{ \ + for ( dim_t jj = 0; jj < (n); ++jj ) \ + for ( dim_t ii = 0; ii < (m); ++ii ) \ + { \ + PASTEMAC(chx,ctype)* restrict xij = (x) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = (y) + ii*(rs_y) + jj*(cs_y); \ +\ + bli_tcopys( chx,chy, *xij, *yij ); \ + } \ +} + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r +// (yr) := (xr); +// (yi) xx 0 ; + +// r c +// (yr) := (xr); +// (yi) xx (xi); + +// c r +// (yr) := (xr); +// (yi) := 0 ; + +// c c +// (yr) := (xr); +// (yi) := (xi); + +#endif + diff --git a/frame/include/level0/bli_tdots.h b/frame/include/level0/bli_tdots.h new file mode 100644 index 0000000000..e1f9e76075 --- /dev/null +++ b/frame/include/level0/bli_tdots.h @@ -0,0 +1,109 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TDOTS_H +#define BLIS_TDOTS_H + +// -- Implementation macro ----------------------------------------------------- + +// (cr) += (ar) * (br) - (ai) * (bi); +// (ci) += (ai) * (br) + (ar) * (bi); + +#define bli_tdotims bli_taxpyims + +// -- API macros --------------------------------------------------------------- + +// NOTE: The first two operands must be swapped, because axpy conjugates +// x (the first operand), while dot conjugates y (the second operand). + +// -- Consolidated -- + +// tdots +#define bli_tdots( chx,chy,cha,chc, x, y, a ) \ + bli_taxpys( chy,chx,cha,chc, y, x, a ) + +// tdotjs +#define bli_tdotjs( chx,chy,cha,chc, x, y, a ) \ + bli_taxpyjs( chy,chx,cha,chc, y, x, a ) + +// -- Exposed real/imaginary -- + +// tdotris +#define bli_tdotris( chx,chy,cha,chc, xr, xi, yr, yi, ar, ai ) \ + bli_taxpyris( chy,chx,cha,chc, yr, yi, xr, yx, ar, ai ) + +// tdotjris +#define bli_tdotjris( chx,chy,cha,chc, xr, xi, yr, yi, ar, ai ) \ + bli_taxpyjris( chy,chx,cha,chc, yr, yi, xr, yx, ar, ai ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r r +// (yr) += (ar) * (xr) - 0 * 0 ; +// (yi) xx 0 * (xr) + (ar) * 0 ; + +// r r c +// (yr) += (ar) * (xr) - 0 * (xi); +// (yi) xx 0 * (xr) + (ar) * (xi); + +// r c r +// (yr) += (ar) * (xr) - (ai) * 0 ; +// (yi) xx (ai) * (xr) + (ar) * 0 ; + +// r c c +// (yr) += (ar) * (xr) - (ai) * (xi); +// (yi) xx (ai) * (xr) + (ar) * (xi); + +// c r r +// (yr) += (ar) * (xr) - 0 * 0 ; +// (yi) += 0 * (xr) + (ar) * 0 ; + +// c r c +// (yr) += (ar) * (xr) - 0 * (xi); +// (yi) += 0 * (xr) + (ar) * (xi); + +// c c r +// (yr) += (ar) * (xr) - (ai) * 0 ; +// (yi) += (ai) * (xr) + (ar) * 0 ; + +// c c c +// (yr) += (ar) * (xr) - (ai) * (xi); +// (yi) += (ai) * (xr) + (ar) * (xi); + +#endif + diff --git a/frame/include/level0/bli_teqs.h b/frame/include/level0/bli_teqs.h new file mode 100644 index 0000000000..f915d1a19d --- /dev/null +++ b/frame/include/level0/bli_teqs.h @@ -0,0 +1,154 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TEQS_H +#define BLIS_TEQS_H + +// -- Implementation macro ----------------------------------------------------- + +// (xr) == (yr) && (xi) == (yi) + +#define bli_teqims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + chc \ + ) \ + ( PASTEMAC(PASTEMAC(chc,prec),eq)( PASTEMAC(px,chc,tcast)(xr), \ + PASTEMAC(py,chc,tcast)(yr) ) && \ + PASTEMAC(PASTEMAC(chc,prec),eq)( PASTEMAC(px,chc,tcast)(xi), \ + PASTEMAC(py,chc,tcast)(yi) ) ) + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// teqs +#define bli_teqs( chx, chy, chc, x, y ) \ + bli_teqims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// -- Exposed real/imaginary -- + +// teqris +#define bli_teqris( chx, chy, chc, xr, xi, yr, yi ) \ + bli_teqims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// -- Convenience macros ------------------------------------------------------- + +// -- Exposed real/imaginary -- + +#define bli_teq1ris( chx, xr, xi ) \ + bli_teqris \ + ( \ + chx, chx, chx, \ + xr, \ + xi, \ + PASTEMAC(PASTEMAC(chx,prec),one), \ + PASTEMAC(PASTEMAC(chx,prec),zero) \ + ) + +#define bli_teq0ris( chx, xr, xi ) \ + bli_teqris \ + ( \ + chx, chx, chx, \ + xr, \ + xi, \ + PASTEMAC(PASTEMAC(chx,prec),zero), \ + PASTEMAC(PASTEMAC(chx,prec),zero) \ + ) + +#define bli_teqm1ris( chx, xr, xi ) \ + bli_teqris \ + ( \ + chx, chx, chx, \ + xr, \ + xi, \ + PASTEMAC(PASTEMAC(chx,prec),mone), \ + PASTEMAC(PASTEMAC(chx,prec),zero) \ + ) + +// -- Consolidated -- + +#define bli_teq1s( chx, x ) \ + bli_teq1ris \ + ( \ + chx, \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x) \ + ) + +#define bli_teq0s( chx, x ) \ + bli_teq0ris \ + ( \ + chx, \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x) \ + ) + +#define bli_teqm1s( chx, x ) \ + bli_teqm1ris \ + ( \ + chx, \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x) \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- + +#endif + diff --git a/frame/include/level0/bli_fprints.h b/frame/include/level0/bli_tfprints.h similarity index 52% rename from frame/include/level0/bli_fprints.h rename to frame/include/level0/bli_tfprints.h index c52cddfc96..2616d1e3f9 100644 --- a/frame/include/level0/bli_fprints.h +++ b/frame/include/level0/bli_tfprints.h @@ -32,37 +32,82 @@ */ -#ifndef BLIS_FPRINTS_H -#define BLIS_FPRINTS_H +#ifndef BLIS_TFPRINTS_H +#define BLIS_TFPRINTS_H -// prints +// -- Implementation macro ----------------------------------------------------- -#define bli_sfprints( file, spec, x ) \ -{ \ - fprintf( file, spec, (x) ); \ -} -#define bli_dfprints( file, spec, x ) \ +// -- real domain implementation -- + +#define bli_rfprintims( \ + \ + file, spec, \ + dx, px, xr, xi \ + ) \ { \ - fprintf( file, spec, (x) ); \ + fprintf( file, spec, xr ); \ } -#define bli_cfprints( file, spec, x ) \ -{ \ - fprintf( file, spec, bli_creal(x) ); \ - fprintf( file, " + " ); \ - fprintf( file, spec, bli_cimag(x) ); \ - fprintf( file, " " ); \ -} -#define bli_zfprints( file, spec, x ) \ + +// -- complex domain implementation -- + +#define bli_cfprintims( \ + \ + file, spec, \ + dx, px, xr, xi \ + ) \ { \ - fprintf( file, spec, bli_zreal(x) ); \ + fprintf( file, spec, xr ); \ fprintf( file, " + " ); \ - fprintf( file, spec, bli_zimag(x) ); \ - fprintf( file, " " ); \ + fprintf( file, spec, xi ); \ + fprintf( file, "i" ); \ } -#define bli_ifprints( file, spec, x ) \ + +// -- general implementation -- + +#define bli_tfprintims( \ + \ + file, spec, \ + dx, px, xr, xi \ + ) \ { \ - fprintf( file, spec, (x) ); \ + PASTEMAC(dx,fprintims) \ + ( \ + file, spec, \ + dx, px, xr, xi \ + ); \ } +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tfprints +#define bli_tfprints( chx, file, spec, x ) \ + bli_tfprintims \ + ( \ + file, spec, \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x) \ + ) + +// -- Exposed real/imaginary -- + +// tfprintris +#define bli_tfprintris( chx, file, spec, xr, xi ) \ + bli_tfprintims \ + ( \ + file, spec, \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- #endif + diff --git a/frame/include/level0/bli_setrs.h b/frame/include/level0/bli_tgets.h similarity index 57% rename from frame/include/level0/bli_setrs.h rename to frame/include/level0/bli_tgets.h index 6a5b4a3f88..d052c8f235 100644 --- a/frame/include/level0/bli_setrs.h +++ b/frame/include/level0/bli_tgets.h @@ -32,45 +32,70 @@ */ -#ifndef BLIS_SETRS_H -#define BLIS_SETRS_H - -// setrs - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#define bli_sssetrs( xr, y ) { (y) = (xr); } -#define bli_dssetrs( xr, y ) { (y) = (xr); } - -#define bli_sdsetrs( xr, y ) { (y) = (xr); } -#define bli_ddsetrs( xr, y ) { (y) = (xr); } - -#ifndef BLIS_ENABLE_C99_COMPLEX - -#define bli_scsetrs( xr, y ) { bli_creal(y) = (xr); } -#define bli_dcsetrs( xr, y ) { bli_creal(y) = (xr); } - -#define bli_szsetrs( xr, y ) { bli_zreal(y) = (xr); } -#define bli_dzsetrs( xr, y ) { bli_zreal(y) = (xr); } - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -#define bli_scsetrs( xr, y ) { (y) = (xr) + bli_cimag(y) * (I); } -#define bli_dcsetrs( xr, y ) { (y) = (xr) + bli_cimag(y) * (I); } - -#define bli_szsetrs( xr, y ) { (y) = (xr) + bli_zimag(y) * (I); } -#define bli_dzsetrs( xr, y ) { (y) = (xr) + bli_zimag(y) * (I); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_ssetrs( xr, y ) bli_sssetrs( xr, y ) -#define bli_dsetrs( xr, y ) bli_ddsetrs( xr, y ) -#define bli_csetrs( xr, y ) bli_scsetrs( xr, y ) -#define bli_zsetrs( xr, y ) bli_dzsetrs( xr, y ) - +#ifndef BLIS_TGETS_H +#define BLIS_TGETS_H + +// -- Implementation macro ----------------------------------------------------- + +// (yr) := (xr) +// (yi) := (xi) + +// NOTE: always assign the imaginary component, even for real y + +#define bli_tgetims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi \ + ) \ +{ \ + PASTEMAC(c,assigns) \ + ( \ + PASTEMAC(px,py,tcast)(xr), \ + PASTEMAC(px,py,tcast)(xi), \ + yr, \ + yi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Hybrid -- + +// tgets +#define bli_tgets( chx, chy, x, yr, yi ) \ + bli_tgetims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r +// (yr) := (xr); +// (yi) := 0 ; + +// r c +// (yr) := (xr); +// (yi) := (xi); + +// c r +// (yr) := (xr); +// (yi) := 0 ; + +// c c +// (yr) := (xr); +// (yi) := (xi); #endif diff --git a/frame/include/level0/bli_tinverts.h b/frame/include/level0/bli_tinverts.h new file mode 100644 index 0000000000..ec8698298e --- /dev/null +++ b/frame/include/level0/bli_tinverts.h @@ -0,0 +1,219 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TINVERTS_H +#define BLIS_TINVERTS_H + +// -- Implementation macro ----------------------------------------------------- + +#define bli_tinvertims( \ + \ + dx, px, xr, xi, \ + chc \ + ) \ +{ \ + PASTEMAC(dx,invertims) \ + ( \ + dx, px, xr, xi, \ + chc \ + ); \ +} + +// -- real domain implementation -- + +// (xr) = 1.0 / (xr); + +#define bli_rinvertims( \ + \ + dx, px, xr, xi, \ + chc \ + ) \ +{ \ + PASTEMAC(r,assigns) \ + ( \ + PASTEMAC(chc,px,tcast)( \ + PASTEMAC(chc,div)( \ + PASTEMAC(chc,one), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ),\ + PASTEMAC(px,zero), \ + xr, \ + xi \ + ); \ +} + +// -- complex domain implementation -- + +// sr = maxabs( xr, xi ); +// xsr = xr / sr; +// xsi = xi / sr; +// tempr = xr * xsr + xi * xsi +// xr = xsr / tempr; +// xi = -xsi / tempr; + +#define bli_cinvertims( \ + \ + dx, px, xr, xi, \ + chc \ + ) \ +{ \ + PASTEMAC(ro,declinits) \ + ( \ + chc, \ + PASTEMAC(chc,maxabs)( \ + PASTEMAC(px,chc,tcast)(xr), \ + PASTEMAC(px,chc,tcast)(xi) \ + ), \ + sr \ + ) \ + PASTEMAC(c,declinits) \ + ( \ + chc, \ + PASTEMAC(chc,div)( \ + PASTEMAC(px,chc,tcast)(xr), \ + sr \ + ), \ + PASTEMAC(chc,div)( \ + PASTEMAC(px,chc,tcast)(xi), \ + sr \ + ), \ + xsr, \ + xsi \ + ) \ + PASTEMAC(ro,declinits) \ + ( \ + chc, \ + PASTEMAC(chc,add)( \ + PASTEMAC(chc,mul)( \ + PASTEMAC(px,chc,tcast)(xr), \ + xsr \ + ), \ + PASTEMAC(chc,mul)( \ + PASTEMAC(px,chc,tcast)(xi), \ + xsi \ + ) \ + ), \ + tempr \ + ) \ + PASTEMAC(c,assigns) \ + ( \ + PASTEMAC(chc,px,tcast)( \ + PASTEMAC(chc,div)( \ + xsr, \ + tempr \ + ) \ + ),\ + PASTEMAC(chc,px,tcast)( \ + PASTEMAC(chc,div)( \ + PASTEMAC(PASTEMAC(chc,prec),neg)(xsi), \ + tempr \ + ) \ + ),\ + xr, \ + xi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tinverts +#define bli_tinverts( chx, chc, x ) \ + bli_tinvertims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chc,prec) \ + ) + +// -- Exposed real/imaginary -- + +// tinvertris +#define bli_tinvertris( chx, chc, xr, xi ) \ + bli_tinvertims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chc,prec) \ + ) + +// -- 1e / 1r -- + +// invert1es +#define bli_tinvert1es( chx, chc, xir, xri ) \ + bli_tinvertims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(xri), \ + PASTEMAC(chx,imag)(xri), \ + PASTEMAC(chc,prec) \ + ); \ + bli_tcopyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(xri) \ + ), \ + PASTEMAC(chx,real)(xri), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(xir), \ + PASTEMAC(chx,imag)(xir) \ + ) + +// invert1rs +#define bli_tinvert1rs( chx, chc, xr, xi ) \ + bli_tinvertims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chc,prec) \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- + +#endif + diff --git a/frame/include/level0/bli_tinvscals.h b/frame/include/level0/bli_tinvscals.h new file mode 100644 index 0000000000..cdf0f31968 --- /dev/null +++ b/frame/include/level0/bli_tinvscals.h @@ -0,0 +1,264 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TINVSCALS_H +#define BLIS_TINVSCALS_H + +// -- Implementation macro ----------------------------------------------------- + +#define bli_tinvscalims( \ + \ + da, pa, ar, ai, \ + dx, px, xr, xi, \ + chc \ + ) \ +{ \ + PASTEMAC(da,dx,invscalims) \ + ( \ + da, pa, ar, ai, \ + dx, px, xr, xi, \ + chc \ + ); \ +} + +// -- real-real domain implementation -- +// -- real-complex domain implementation -- + +// (xr) = (xr) / (ar); +// (xi) = (xi) / (ar); + +#define bli_rrinvscalims bli_rcinvscalims + +#define bli_rcinvscalims( \ + \ + da, pa, ar, ai, \ + dx, px, xr, xi, \ + chc \ + ) \ +{ \ + PASTEMAC(dx,assigns) \ + ( \ + PASTEMAC(chc,px,tcast)( \ + PASTEMAC(chc,div)( \ + PASTEMAC(px,chc,tcast)(xr), \ + PASTEMAC(pa,chc,tcast)(ar) \ + ) \ + ),\ + PASTEMAC(chc,px,tcast)( \ + PASTEMAC(chc,div)( \ + PASTEMAC(px,chc,tcast)(xi), \ + PASTEMAC(pa,chc,tcast)(ar) \ + ) \ + ),\ + xr, \ + xi \ + ); \ +} + +// -- complex-real domain implementation -- +// -- complex-complex domain implementation -- + +// sr = maxabs( ar, ai ); +// asr = ar / sr; +// asi = ai / sr; +// xrt = xr; +// tempr = ar * asr + ai * asi +// xr = ( asr * xrt + asi * xi ) / tempr; +// xi = ( asr * xi - asi * xrt ) / tempr; + +#define bli_crinvscalims bli_ccinvscalims + +#define bli_ccinvscalims( \ + \ + da, pa, ar, ai, \ + dx, px, xr, xi, \ + chc \ + ) \ +{ \ + PASTEMAC(ro,declinits) \ + ( \ + chc, \ + PASTEMAC(chc,maxabs)( \ + PASTEMAC(pa,chc,tcast)(ar), \ + PASTEMAC(pa,chc,tcast)(ai) \ + ), \ + sr \ + ) \ + PASTEMAC(c,declinits) \ + ( \ + chc, \ + PASTEMAC(chc,div)( \ + PASTEMAC(pa,chc,tcast)(ar), \ + sr \ + ), \ + PASTEMAC(chc,div)( \ + PASTEMAC(pa,chc,tcast)(ai), \ + sr \ + ), \ + asr, \ + asi \ + ) \ + PASTEMAC(ro,declinits) \ + ( \ + chc, \ + PASTEMAC(px,chc,tcast)(xr), \ + xrt \ + ) \ + PASTEMAC(ro,declinits) \ + ( \ + chc, \ + PASTEMAC(chc,add)( \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ar), \ + asr \ + ), \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ai), \ + asi \ + ) \ + ), \ + tempr \ + ) \ + PASTEMAC(dx,assigns) \ + ( \ + PASTEMAC(chc,px,tcast)( \ + PASTEMAC(chc,div)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(chc,mul)( \ + asr, \ + xrt \ + ), \ + PASTEMAC(chc,mul)( \ + asi, \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ), \ + tempr \ + ) \ + ),\ + PASTEMAC(chc,px,tcast)( \ + PASTEMAC(chc,div)( \ + PASTEMAC(chc,sub)( \ + PASTEMAC(chc,mul)( \ + asr, \ + PASTEMAC(px,chc,tcast)(xi) \ + ), \ + PASTEMAC(chc,mul)( \ + asi, \ + xrt \ + ) \ + ), \ + tempr \ + ) \ + ),\ + xr, \ + xi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tinvscals +#define bli_tinvscals( cha, chx, chc, a, x ) \ + bli_tinvscalims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chc,prec) \ + ) + +// tinvscaljs +#define bli_tinvscaljs( cha, chx, chc, a, x ) \ + bli_tinvscalims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(PASTEMAC(cha,prec),neg)( \ + PASTEMAC(cha,imag)(a) \ + ), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chc,prec) \ + ) + +// -- Exposed real/imaginary -- + +// tinvscalris +#define bli_tinvscalris( cha, chx, chc, ar, ai, xr, xi ) \ + bli_tinvscalims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + ar, \ + ai, \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chc,prec) \ + ) + +// tinvscaljris +#define bli_tinvscaljris( cha, chx, chc, ar, ai, xr, xi ) \ + bli_tinvscalims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + ar, \ + PASTEMAC(PASTEMAC(cha,prec),neg)( \ + ai ), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chc,prec) \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- + +#endif + diff --git a/frame/include/level0/old/ri3/bli_scal2ri3s.h b/frame/include/level0/bli_tneg2s.h similarity index 53% rename from frame/include/level0/old/ri3/bli_scal2ri3s.h rename to frame/include/level0/bli_tneg2s.h index 72f3911cc8..8c118f2cfd 100644 --- a/frame/include/level0/old/ri3/bli_scal2ri3s.h +++ b/frame/include/level0/bli_tneg2s.h @@ -32,48 +32,70 @@ */ -#ifndef BLIS_SCAL2RI3S_H -#define BLIS_SCAL2RI3S_H +#ifndef BLIS_TNEG2S_H +#define BLIS_TNEG2S_H -// scal2ri3s +// -- Implementation macro ----------------------------------------------------- -#define bli_sscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \ -{ \ - (yr) = (ar) * (xr); \ -} +// (yr) = -(xr); +// (yi) = -(xi); -#define bli_dscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \ +#define bli_tneg2ims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi \ + ) \ { \ - (yr) = (ar) * (xr); \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(py,neg)( \ + PASTEMAC(px,py,tcast)(xr) \ + ), \ + PASTEMAC(py,neg)( \ + PASTEMAC(px,py,tcast)(xi) \ + ), \ + yr, \ + yi \ + ); \ } -#define bli_cscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \ -{ \ - (yr) = (ar) * (xr) - (ai) * (xi); \ - (yi) = (ai) * (xr) + (ar) * (xi); \ - (yri) = (yr) + (yi); \ -} +// -- API macros --------------------------------------------------------------- -#define bli_zscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \ -{ \ - (yr) = (ar) * (xr) - (ai) * (xi); \ - (yi) = (ai) * (xr) + (ar) * (xi); \ - (yri) = (yr) + (yi); \ -} +// -- Consolidated -- -#define bli_scscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \ -{ \ - (yr) = (ar) * (xr); \ - (yi) = (ar) * (xi); \ - (yri) = (yr) + (yi); \ -} +// tneg2s +#define bli_tneg2s( chx, chy, x, y ) \ + bli_tneg2ims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y) \ + ) -#define bli_dzscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \ -{ \ - (yr) = (ar) * (xr); \ - (yi) = (ar) * (xi); \ - (yri) = (yr) + (yi); \ -} +// -- Exposed real/imaginary -- + +// tneg2ris +#define bli_tneg2ris( chx, chy, xr, xi, yr, yi ) \ + bli_tneg2ims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- #endif diff --git a/frame/include/level0/1r/bli_scal2j1rs.h b/frame/include/level0/bli_trandnp2s.h similarity index 60% rename from frame/include/level0/1r/bli_scal2j1rs.h rename to frame/include/level0/bli_trandnp2s.h index 9f75b55e6d..af321d3d0c 100644 --- a/frame/include/level0/1r/bli_scal2j1rs.h +++ b/frame/include/level0/bli_trandnp2s.h @@ -32,34 +32,57 @@ */ -#ifndef BLIS_SCAL2J1RS_H -#define BLIS_SCAL2J1RS_H +#ifndef BLIS_TRANDNP2S_H +#define BLIS_TRANDNP2S_H -// scal2j1rs +// -- Implementation macro ----------------------------------------------------- -#define bli_cscscal2j1rs( a, x, yr, yi ) \ -{ \ - bli_cscal2jris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), yr, yi ); \ -} +// (xr) = randnp2(); +// (xi) = randnp2(); -#define bli_cccscal2j1rs( a, x, yr, yi ) \ +#define bli_trandnp2ims( \ + \ + dx, px, xr, xi \ + ) \ { \ - bli_cscal2jris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), yr, yi ); \ + PASTEMAC(dx,assigns) \ + ( \ + PASTEMAC(px,randnp2),\ + PASTEMAC(px,randnp2),\ + xr, \ + xi \ + ); \ } -#define bli_zdzscal2j1rs( a, x, yr, yi ) \ -{ \ - bli_zscal2jris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), yr, yi ); \ -} +// -- API macros --------------------------------------------------------------- -#define bli_zzzscal2j1rs( a, x, yr, yi ) \ -{ \ - bli_zscal2jris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), yr, yi ); \ -} +// -- Consolidated -- + +// trandnp2s +#define bli_trandnp2s( chx, x ) \ + bli_trandnp2ims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x) \ + ) + +// -- Exposed real/imaginary -- + +// trandnp2ris +#define bli_trandnp2ris( chx, xr, xi ) \ + bli_trandnp2ims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi \ + ) +// -- Higher-level static functions -------------------------------------------- -#define bli_cscal2j1rs( a, x, yr, yi ) bli_cccscal2j1rs( a, x, yr, yi ) -#define bli_zscal2j1rs( a, x, yr, yi ) bli_zzzscal2j1rs( a, x, yr, yi ) +// -- Notes -------------------------------------------------------------------- #endif diff --git a/frame/include/level0/old/ri3/bli_scal2jri3s.h b/frame/include/level0/bli_trands.h similarity index 60% rename from frame/include/level0/old/ri3/bli_scal2jri3s.h rename to frame/include/level0/bli_trands.h index 08be57c1d7..8b03d0bfd6 100644 --- a/frame/include/level0/old/ri3/bli_scal2jri3s.h +++ b/frame/include/level0/bli_trands.h @@ -32,48 +32,57 @@ */ -#ifndef BLIS_SCAL2JRI3S_H -#define BLIS_SCAL2JRI3S_H +#ifndef BLIS_TRANDS_H +#define BLIS_TRANDS_H -// scal2jri3s +// -- Implementation macro ----------------------------------------------------- -#define bli_sscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \ -{ \ - (yr) = (ar) * (xr); \ -} +// (xr) = rand(); +// (xi) = rand(); -#define bli_dscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \ +#define bli_trandims( \ + \ + dx, px, xr, xi \ + ) \ { \ - (yr) = (ar) * (xr); \ + PASTEMAC(dx,assigns) \ + ( \ + PASTEMAC(px,rand),\ + PASTEMAC(px,rand),\ + xr, \ + xi \ + ); \ } -#define bli_cscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \ -{ \ - (yr) = (ar) * (xr) + (ai) * (xi); \ - (yi) = (ai) * (xr) - (ar) * (xi); \ - (yri) = (yr) + (yi); \ -} +// -- API macros --------------------------------------------------------------- -#define bli_zscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \ -{ \ - (yr) = (ar) * (xr) + (ai) * (xi); \ - (yi) = (ai) * (xr) - (ar) * (xi); \ - (yri) = (yr) + (yi); \ -} +// -- Consolidated -- -#define bli_scscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \ -{ \ - (yr) = (ar) * (xr); \ - (yi) = (ar) * -(xi); \ - (yri) = (yr) + (yi); \ -} +// trands +#define bli_trands( chx, x ) \ + bli_trandims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x) \ + ) -#define bli_dzscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \ -{ \ - (yr) = (ar) * (xr); \ - (yi) = (ar) * -(xi); \ - (yri) = (yr) + (yi); \ -} +// -- Exposed real/imaginary -- + +// trandris +#define bli_trandris( chx, xr, xi ) \ + bli_trandims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- #endif diff --git a/frame/include/level0/bli_tscal2s.h b/frame/include/level0/bli_tscal2s.h new file mode 100644 index 0000000000..586ed66fb0 --- /dev/null +++ b/frame/include/level0/bli_tscal2s.h @@ -0,0 +1,565 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TSCAL2S_H +#define BLIS_TSCAL2S_H + +// -- Implementation macro ----------------------------------------------------- + +// (yr) := (ar) * (xr) - (ai) * (xi); +// (yi) := (ai) * (xr) + (ar) * (xi); + +#define bli_tscal2ims( \ + \ + da, pa, ar, ai, \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + chc \ + ) \ +{ \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,sub)( \ + PASTEMAC(da,dx,termrr)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ar), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ), \ + PASTEMAC(da,dx,termii)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ai), \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ) \ + ) \ + ), \ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(da,dx,termir)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ai), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ), \ + PASTEMAC(da,dx,termri)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ar), \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ) \ + ) \ + ), \ + yr, \ + yi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tscal2s +#define bli_tscal2s( cha, chx, chy, chc, a, x, y ) \ + bli_tscal2ims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// tscal2js +#define bli_tscal2js( cha, chx, chy, chc, a, x, y ) \ + bli_tscal2ims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(x) \ + ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// -- Exposed real/imaginary -- + +// tscal2ris +#define bli_tscal2ris( cha, chx, chy, chc, ar, ai, xr, xi, yr, yi ) \ + bli_tscal2ims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + ar, \ + ai, \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// tscal2jris +#define bli_tscal2jris( cha, chx, chy, chc, ar, ai, xr, xi, yr, yi ) \ + bli_tscal2ims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + ar, \ + ai, \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + xi ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// -- 1e / 1r -- + +// tscal21es +#define bli_tscal21es( cha, chx, chy, chc, a, x, yri, yir ) \ + bli_tscal2ims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(yri), \ + PASTEMAC(chy,imag)(yri), \ + PASTEMAC(chc,prec) \ + ); \ + bli_tcopyims \ + ( \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(PASTEMAC(chy,prec),neg)( \ + PASTEMAC(chy,imag)(yri) \ + ), \ + PASTEMAC(chy,real)(yri), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(yir), \ + PASTEMAC(chy,imag)(yir) \ + ) + +// tscal2j1es +#define bli_tscal2j1es( cha, chx, chy, chc, a, x, yri, yir ) \ + bli_tscal2ims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(x) \ + ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(yri), \ + PASTEMAC(chy,imag)(yri), \ + PASTEMAC(chc,prec) \ + ); \ + bli_tcopyims \ + ( \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(PASTEMAC(chy,prec),neg)( \ + PASTEMAC(chy,imag)(yri) \ + ), \ + PASTEMAC(chy,real)(yri), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(yir), \ + PASTEMAC(chy,imag)(yir) \ + ) + +// tscal21rs +#define bli_tscal21rs( cha, chx, chy, chc, a, x, yr, yi ) \ + bli_tscal2ims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// tscal2j1rs +#define bli_tscal2j1rs( cha, chx, chy, chc, a, x, yr, yi ) \ + bli_tscal2ims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(x) \ + ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- mxn -- + +// scal2bbs_mxn + +#define bli_tscal2bbs_mxn_r( \ + cha,chx,chy,chc, \ + ctypea, ctypea_r, \ + ctypex, ctypex_r, \ + ctypey, ctypey_r, \ + conjx, \ + m, \ + n, \ + alpha, \ + x, incx, ldx, \ + y, incy, ldy \ + ) \ +{ \ + /* Assume that the duplication factor is the row stride of y. */ \ + const dim_t d = incy; \ + const dim_t ds_y = 1; \ +\ + for ( dim_t j = 0; j < (n); ++j ) \ + { \ + ctypex* restrict xj = (ctypex*)(x) + j*(ldx); \ + ctypey* restrict yj = (ctypey*)(y) + j*(ldy); \ +\ + for ( dim_t i = 0; i < (m); ++i ) \ + { \ + ctypex* restrict xij = xj + i*(incx); \ + ctypey* restrict yij = yj + i*(incy); \ +\ + bli_tscal2s( cha,chx,chy,chc, *(const ctypea* restrict)(alpha), *xij, *yij ); \ +\ + for ( dim_t p = 1; p < d; ++p ) \ + { \ + ctypey* restrict yijd = yij + p*ds_y; \ +\ + bli_tcopys( chy,chy, *yij, *yijd ); \ + } \ + } \ + } \ +} + +#define bli_tscal2bbs_mxn_c( \ + cha,chx,chy,chc, \ + ctypea, ctypea_r, \ + ctypex, ctypex_r, \ + ctypey, ctypey_r, \ + conjx, \ + m, \ + n, \ + alpha, \ + x, incx, ldx, \ + y, incy, ldy \ + ) \ +{ \ + /* Assume that the duplication factor is the row stride of y. */ \ + const dim_t d = incy; \ + const dim_t ds_y = 1; \ +\ + const inc_t incx2 = 2 * (incx); \ + const inc_t ldx2 = 2 * (ldx); \ +\ + const inc_t incy2 = 2 * (incy); \ + const inc_t ldy2 = 2 * (ldy); \ +\ + ctypea_r* restrict alpha_r = ( ctypea_r* )(alpha); \ + ctypea_r* restrict alpha_i = ( ctypea_r* )(alpha) + 1; \ + ctypex_r* restrict chi_r = ( ctypex_r* )(x); \ + ctypex_r* restrict chi_i = ( ctypex_r* )(x) + 1; \ + ctypey_r* restrict psi_r = ( ctypey_r* )(y); \ + ctypey_r* restrict psi_i = ( ctypey_r* )(y) + 1*d; \ +\ + if ( bli_is_conj( conjx ) ) \ + { \ + for ( dim_t j = 0; j < (n); ++j ) \ + { \ + ctypex_r* restrict chij_r = chi_r + j*ldx2; \ + ctypex_r* restrict chij_i = chi_i + j*ldx2; \ + ctypey_r* restrict psij_r = psi_r + j*ldy2; \ + ctypey_r* restrict psij_i = psi_i + j*ldy2; \ +\ + for ( dim_t i = 0; i < (m); ++i ) \ + { \ + ctypex_r* restrict chiij_r = chij_r + i*incx2; \ + ctypex_r* restrict chiij_i = chij_i + i*incx2; \ + ctypey_r* restrict psiij_r = psij_r + i*incy2; \ + ctypey_r* restrict psiij_i = psij_i + i*incy2; \ +\ + bli_tscal2jris( cha,chx,chy,chc, \ + *alpha_r, *alpha_i, \ + *chiij_r, *chiij_i, \ + *psiij_r, *psiij_i ); \ +\ + for ( dim_t p = 1; p < d; ++p ) \ + { \ + ctypey_r* restrict psiijd_r = psiij_r + p*ds_y; \ + ctypey_r* restrict psiijd_i = psiij_i + p*ds_y; \ +\ + bli_tcopyris( chy,chy, *psiij_r, *psiij_i, \ + *psiijd_r, *psiijd_i ); \ + } \ + } \ + } \ + } \ + else /* if ( bli_is_noconj( conjx ) ) */ \ + { \ + for ( dim_t j = 0; j < (n); ++j ) \ + { \ + ctypex_r* restrict chij_r = chi_r + j*ldx2; \ + ctypex_r* restrict chij_i = chi_i + j*ldx2; \ + ctypey_r* restrict psij_r = psi_r + j*ldy2; \ + ctypey_r* restrict psij_i = psi_i + j*ldy2; \ +\ + for ( dim_t i = 0; i < (m); ++i ) \ + { \ + ctypex_r* restrict chiij_r = chij_r + i*incx2; \ + ctypex_r* restrict chiij_i = chij_i + i*incx2; \ + ctypey_r* restrict psiij_r = psij_r + i*incy2; \ + ctypey_r* restrict psiij_i = psij_i + i*incy2; \ +\ + bli_tscal2ris( cha,chx,chy,chc, \ + *alpha_r, *alpha_i, \ + *chiij_r, *chiij_i, \ + *psiij_r, *psiij_i ); \ +\ + for ( dim_t p = 1; p < d; ++p ) \ + { \ + ctypey_r* restrict psiijd_r = psiij_r + p*ds_y; \ + ctypey_r* restrict psiijd_i = psiij_i + p*ds_y; \ +\ + bli_tcopyris( chy,chy, *psiij_r, *psiij_i, \ + *psiijd_r, *psiijd_i ); \ + } \ + } \ + } \ + } \ +} + +#define bli_tscal2bbs_mxn( \ + cha,chx,chy,chc, \ + conjx, \ + m, \ + n, \ + alpha, \ + x, incx, ldx, \ + y, incy, ldy \ + ) \ +PASTECH(bli_tscal2bbs_mxn_,PASTEMAC(chy,dom)) \ +( \ + cha,chx,chy,chc, \ + PASTEMAC(cha,ctype),PASTEMAC(cha,ctyper), \ + PASTEMAC(chx,ctype),PASTEMAC(chx,ctyper), \ + PASTEMAC(chy,ctype),PASTEMAC(chy,ctyper), \ + conjx, \ + m, \ + n, \ + alpha, \ + x, incx, ldx, \ + y, incy, ldy \ +) + +#define bli_tscal2s_mxn( cha, chx, chy, chc, conjx, m, n, alpha, x, rs_x, cs_x, y, rs_y, cs_y ) \ +{ \ + if ( bli_is_conj( conjx ) ) \ + { \ + for ( dim_t jj = 0; jj < (n); ++jj ) \ + for ( dim_t ii = 0; ii < (m); ++ii ) \ + { \ + PASTEMAC(chx,ctype)* restrict xij = (x) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = (y) + ii*(rs_y) + jj*(cs_y); \ +\ + bli_tscal2js( cha,chx,chy,chc, *(alpha), *xij, *yij ); \ + } \ + } \ + else \ + { \ + for ( dim_t jj = 0; jj < (n); ++jj ) \ + for ( dim_t ii = 0; ii < (m); ++ii ) \ + { \ + PASTEMAC(chx,ctype)* restrict xij = (x) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = (y) + ii*(rs_y) + jj*(cs_y); \ +\ + bli_tscal2s( cha,chx,chy,chc, *(alpha), *xij, *yij ); \ + } \ + } \ +} + +#define bli_tscal2ris_mxn( cha, chx, chy, chc, conjx, m, n, alpha, x, rs_x, cs_x, y, rs_y, cs_y, is_y ) \ +{ \ + PASTEMAC(cha,ctyper)* restrict alpha_r = ( PASTEMAC(cha,ctyper)* )(alpha); (void)alpha_r; \ + PASTEMAC(cha,ctyper)* restrict alpha_i = ( PASTEMAC(cha,ctyper)* )(alpha) + 1; (void)alpha_i; \ + PASTEMAC(chx,ctyper)* restrict x_r = ( PASTEMAC(chx,ctyper)* )(x); \ + PASTEMAC(chx,ctyper)* restrict x_i = ( PASTEMAC(chx,ctyper)* )(x) + 1; \ + PASTEMAC(chy,ctyper)* restrict y_r = ( PASTEMAC(chy,ctyper)* )(y); \ + PASTEMAC(chy,ctyper)* restrict y_i = ( PASTEMAC(chy,ctyper)* )(y) + (is_y); \ + const dim_t incx2 = 2*(rs_x); \ + const dim_t ldx2 = 2*(cs_x); \ +\ + if ( bli_is_conj( conjx ) ) \ + { \ + for ( dim_t jj = 0; jj < (n); ++jj ) \ + for ( dim_t ii = 0; ii < (m); ++ii ) \ + { \ + PASTEMAC(chx,ctyper)* restrict chi11_r = x_r + ii*incx2 + jj*ldx2; (void)chi11_r; \ + PASTEMAC(chx,ctyper)* restrict chi11_i = x_i + ii*incx2 + jj*ldx2; (void)chi11_i; \ + PASTEMAC(chy,ctyper)* restrict psi11_r = y_r + ii*(rs_y) + jj*(cs_y); (void)psi11_r; \ + PASTEMAC(chy,ctyper)* restrict psi11_i = y_i + ii*(rs_y) + jj*(cs_y); (void)psi11_i; \ +\ + bli_tscal2jris \ + ( \ + cha,chx,chy,chc, \ + *alpha_r, *alpha_i, \ + *chi11_r, *chi11_i, \ + *psi11_r, *psi11_i \ + ); \ + } \ + } \ + else \ + { \ + for ( dim_t jj = 0; jj < (n); ++jj ) \ + for ( dim_t ii = 0; ii < (m); ++ii ) \ + { \ + PASTEMAC(chx,ctyper)* restrict chi11_r = x_r + ii*incx2 + jj*ldx2; (void)chi11_r; \ + PASTEMAC(chx,ctyper)* restrict chi11_i = x_i + ii*incx2 + jj*ldx2; (void)chi11_i; \ + PASTEMAC(chy,ctyper)* restrict psi11_r = y_r + ii*(rs_y) + jj*(cs_y); (void)psi11_r; \ + PASTEMAC(chy,ctyper)* restrict psi11_i = y_i + ii*(rs_y) + jj*(cs_y); (void)psi11_i; \ +\ + bli_tscal2ris \ + ( \ + cha,chx,chy,chc, \ + *alpha_r, *alpha_i, \ + *chi11_r, *chi11_i, \ + *psi11_r, *psi11_i \ + ); \ + } \ + } \ +} + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r r +// (yr) := (ar) * (xr) - 0 * 0 ; +// (yi) xx 0 * (xr) + (ar) * 0 ; + +// r r c +// (yr) := (ar) * (xr) - 0 * (xi); +// (yi) xx 0 * (xr) + (ar) * (xi); + +// r c r +// (yr) := (ar) * (xr) - (ai) * 0 ; +// (yi) xx (ai) * (xr) + (ar) * 0 ; + +// r c c +// (yr) := (ar) * (xr) - (ai) * (xi); +// (yi) xx (ai) * (xr) + (ar) * (xi); + +// c r r +// (yr) := (ar) * (xr) - 0 * 0 ; +// (yi) := 0 * (xr) + (ar) * 0 ; + +// c r c +// (yr) := (ar) * (xr) - 0 * (xi); +// (yi) := 0 * (xr) + (ar) * (xi); + +// c c r +// (yr) := (ar) * (xr) - (ai) * 0 ; +// (yi) := (ai) * (xr) + (ar) * 0 ; + +// c c c +// (yr) := (ar) * (xr) - (ai) * (xi); +// (yi) := (ai) * (xr) + (ar) * (xi); + +#endif + diff --git a/frame/include/level0/bli_tscalcjs.h b/frame/include/level0/bli_tscalcjs.h new file mode 100644 index 0000000000..b526aa0c61 --- /dev/null +++ b/frame/include/level0/bli_tscalcjs.h @@ -0,0 +1,129 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TSCALCJS_H +#define BLIS_TSCALCJS_H + +// -- Implementation macro ----------------------------------------------------- + +// (tr) := (ar) * (xr) - ( is_conj( conj ) ? -(ai) : (ai) ) * (xi); +// (ti) := ( is_conj( conj ) ? -(ai) : (ai) ) * (xr) + (ar) * (xi); +// (xr) := (tr); +// (xi) := (ti); + +#define bli_tscalcjims( \ + \ + conj, \ + da, pa, ar, ai, \ + dx, px, xr, xi, \ + chc \ + ) \ +{ \ + PASTEMAC(c,declinits) \ + ( \ + chc, \ + PASTEMAC(chc,sub)( \ + PASTEMAC(da,dx,termrr)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ar), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ), \ + PASTEMAC(da,dx,termii)( \ + chc, \ + PASTEMAC(chc,mul)( \ + ( bli_is_conj( conj ) ? PASTEMAC(pa,neg)( \ + PASTEMAC(pa,chc,tcast)(ai) \ + ) \ + : PASTEMAC(pa,chc,tcast)(ai) \ + ), \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ) \ + ),\ + PASTEMAC(chc,add)( \ + PASTEMAC(da,dx,termir)( \ + chc, \ + PASTEMAC(chc,mul)( \ + ( bli_is_conj( conj ) ? PASTEMAC(pa,neg)( \ + PASTEMAC(pa,chc,tcast)(ai) \ + ) \ + : PASTEMAC(pa,chc,tcast)(ai) \ + ), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ), \ + PASTEMAC(da,dx,termri)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ar), \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ) \ + ), \ + tr, \ + ti \ + ) \ + PASTEMAC(dx,assigns) \ + ( \ + tr, \ + ti, \ + xr, \ + xi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tscals +#define bli_tscalcjs( cha, chx, chc, conj, a, x ) \ + bli_tscalcjims \ + ( \ + conj, \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chc,prec) \ + ) + +#endif + diff --git a/frame/include/level0/bli_tscals.h b/frame/include/level0/bli_tscals.h new file mode 100644 index 0000000000..709860c986 --- /dev/null +++ b/frame/include/level0/bli_tscals.h @@ -0,0 +1,272 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TSCALS_H +#define BLIS_TSCALS_H + +// -- Implementation macro ----------------------------------------------------- + +// (tr) := (ar) * (xr) - (ai) * (xi); +// (ti) := (ai) * (xr) + (ar) * (xi); +// (xr) := (tr); +// (xi) := (ti); + +#define bli_tscalims( \ + \ + da, pa, ar, ai, \ + dx, px, xr, xi, \ + chc \ + ) \ +{ \ + PASTEMAC(c,declinits) \ + ( \ + chc, \ + PASTEMAC(chc,sub)( \ + PASTEMAC(da,dx,termrr)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ar), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ), \ + PASTEMAC(da,dx,termii)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ai), \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ) \ + ),\ + PASTEMAC(chc,add)( \ + PASTEMAC(da,dx,termir)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ai), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ), \ + PASTEMAC(da,dx,termri)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pa,chc,tcast)(ar), \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ) \ + ), \ + tr, \ + ti \ + ) \ + PASTEMAC(dx,assigns) \ + ( \ + tr, \ + ti, \ + xr, \ + xi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tscals +#define bli_tscals( cha, chx, chc, a, x ) \ + bli_tscalims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chc,prec) \ + ) + +// tscaljs +#define bli_tscaljs( cha, chx, chc, a, x ) \ + bli_tscalims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(PASTEMAC(cha,prec),neg)( \ + PASTEMAC(cha,imag)(a) \ + ), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chc,prec) \ + ) + +// -- Exposed real/imaginary -- + +// tscalris +#define bli_tscalris( cha, chx, chc, ar, ai, xr, xi ) \ + bli_tscalims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + ar, \ + ai, \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chc,prec) \ + ) + +// tscaljris +#define bli_tscaljris( cha, chx, chc, ar, ai, xr, xi ) \ + bli_tscalims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + ar, \ + PASTEMAC(PASTEMAC(cha,prec),neg)( \ + ai ), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chc,prec) \ + ) + +// -- 1e / 1r -- + +// scal1es +#define bli_tscal1es( cha, chx, chc, a, xir, xri ) \ + bli_tscalims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(xri), \ + PASTEMAC(chx,imag)(xri), \ + PASTEMAC(chc,prec) \ + ); \ + bli_tcopyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(xri) \ + ), \ + PASTEMAC(chx,real)(xri), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(xir), \ + PASTEMAC(chx,imag)(xir) \ + ) + +// scal1es +#define bli_tscal1rs( cha, chx, chc, a, xr, xi ) \ + bli_tscalims \ + ( \ + PASTEMAC(cha,dom), \ + PASTEMAC(cha,prec), \ + PASTEMAC(cha,real)(a), \ + PASTEMAC(cha,imag)(a), \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chc,prec) \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- mxn_uplo -- + +#define bli_tscalris_mxn_uplo( cha, chx, chc, uplo, diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \ +{ \ + if ( bli_is_upper( uplo ) ) \ + { \ + for ( dim_t jj = 0; jj < n; ++jj ) \ + for ( dim_t ii = 0; ii < m; ++ii ) \ + { \ + if ( (doff_t)jj - (doff_t)ii >= diagoff ) \ + { \ + PASTEMAC(chx,ctyper)* restrict xij_r = (xr) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chx,ctyper)* restrict xij_i = (xi) + ii*(rs_x) + jj*(cs_x); \ + (void)xij_i; \ +\ + bli_tscalris( cha,chx,chc, *(ar), *(ai), *xij_r, *xij_i ); \ + } \ + } \ + } \ + else \ + { \ + for ( dim_t jj = 0; jj < n; ++jj ) \ + for ( dim_t ii = 0; ii < m; ++ii ) \ + { \ + if ( (doff_t)jj - (doff_t)ii <= diagoff ) \ + { \ + PASTEMAC(chx,ctyper)* restrict xij_r = (xr) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chx,ctyper)* restrict xij_i = (xi) + ii*(rs_x) + jj*(cs_x); \ + (void)xij_i; \ +\ + bli_tscalris( cha,chx,chc, *(ar), *(ai), *xij_r, *xij_i ); \ + } \ + } \ + } \ +} + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r +// (xr) := (ar) * (xr) - 0 * 0 ; +// (xi) xx 0 * (xr) + (ar) * 0 ; + +// r c +// (xr) := (ar) * (xr) - (ai) * 0 ; +// (xi) xx (ai) * (xr) + (ar) * 0 ; + +// c r +// (xr) := (ar) * (xr) - 0 * (xi); +// (xi) := 0 * (xr) + (ar) * (xi); + +// c c +// (xr) := (ar) * (xr) - (ai) * (xi); +// (xi) := (ai) * (xr) + (ar) * (xi); + +#endif + diff --git a/frame/include/level0/bli_tsets.h b/frame/include/level0/bli_tsets.h new file mode 100644 index 0000000000..a97b70379f --- /dev/null +++ b/frame/include/level0/bli_tsets.h @@ -0,0 +1,294 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2021, Southern Methodist University + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TSETS_H +#define BLIS_TSETS_H + +// -- Implementation macros ---------------------------------------------------- + +#define bli_tsetims( \ + dx, px, xr, xi, \ + dy, py, yr, yi \ + ) \ +{ \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(px,py,tcast)(xr), \ + PASTEMAC(px,py,tcast)(xi), \ + yr, \ + yi \ + ); \ +} + +#define bli_tsetrims( \ + px, xr, \ + dy, py, yr, yi \ + ) \ +{ \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(px,py,tcast)(xr), \ + yi, \ + yr, \ + yi \ + ); \ +} + +#define bli_tsetiims( \ + px, xi, \ + dy, py, yr, yi \ + ) \ +{ \ + PASTEMAC(dy,assigns) \ + ( \ + yr, \ + PASTEMAC(px,py,tcast)(xi), \ + yr, \ + yi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tsets +#define bli_tsets( chx,chy, xr, xi, y ) \ + bli_tsetims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y) \ + ) + +// tsetrs +#define bli_tsetrs( chx,chy, xr, y ) \ + bli_tsetrims \ + ( \ + PASTEMAC(chx,prec), \ + xr, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y) \ + ) + +// tsetis +#define bli_tsetis( chx,chy, xi, y ) \ + bli_tsetiims \ + ( \ + PASTEMAC(chx,prec), \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y) \ + ) + +// -- Exposed real/imaginary -- + +// tsetris +#define bli_tsetris( chx,chy, xr, xi, yr, yi ) \ + bli_tsetims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi \ + ) + +// -- Set to constant -- + +// tset0s +#define bli_tset0s( chy, y ) \ + bli_tsets \ + ( \ + chy,chy, \ + PASTEMAC \ + ( \ + PASTEMAC(chy,prec), \ + zero \ + ), \ + PASTEMAC \ + ( \ + PASTEMAC(chy,prec), \ + zero \ + ), \ + y \ + ) + +// tset1s +#define bli_tset1s( chy, y ) \ + bli_tsets \ + ( \ + chy,chy, \ + PASTEMAC \ + ( \ + PASTEMAC(chy,prec), \ + one \ + ), \ + PASTEMAC \ + ( \ + PASTEMAC(chy,prec), \ + zero \ + ), \ + y \ + ) + +// tsetr0s +#define bli_tsetr0s( chy, y ) \ + bli_tsetrs( chy,chy, PASTEMAC(PASTEMAC(chy,prec),zero), y ) + +// tseti0s +#define bli_tseti0s( chy, y ) \ + bli_tsetis( chy,chy, PASTEMAC(PASTEMAC(chy,prec),zero), y ) + +// tset0ris +#define bli_tset0ris( chy, yr, yi ) \ + bli_tsetris \ + ( \ + chy,chy, \ + PASTEMAC \ + ( \ + PASTEMAC(chy,prec), \ + zero \ + ), \ + PASTEMAC \ + ( \ + PASTEMAC(chy,prec), \ + zero \ + ), \ + yr, \ + yi \ + ) + +// -- Micro-tile -- + +// set0s_mxn +#define bli_tset0s_mxn( chy, m, n, y, rs_y, cs_y ) \ +{ \ + for ( dim_t _j = 0; _j < (n); ++_j ) \ + for ( dim_t _i = 0; _i < (m); ++_i ) \ + bli_tset0s( chy, *((y) + _i*(rs_y) + _j*(cs_y)) ); \ +} + +// set0bbs_mxn +#define bli_tset0bbs_mxn( chy, m, n, y, incy, ldy ) \ +{ \ + /* Assume that the duplication factor is the row stride of y. */ \ + const dim_t _d = incy; \ + const dim_t _ds_y = 1; \ +\ + for ( dim_t _j = 0; _j < (n); ++_j ) \ + { \ + PASTEMAC(chy,ctype)* restrict yj = (PASTEMAC(chy,ctype)*)(y) + _j*(ldy); \ +\ + for ( dim_t _i = 0; _i < (m); ++_i ) \ + { \ + PASTEMAC(chy,ctype)* restrict yij = yj + _i*(incy); \ +\ + for ( dim_t _p = 0; _p < _d; ++_p ) \ + { \ + PASTEMAC(chy,ctype)* restrict yijd = yij + _p*_ds_y; \ +\ + bli_tset0s( chy, *yijd ); \ + } \ + } \ + } \ +} + +// bcastbbs_mxn +#define bli_tbcastbbs_mxn( chy, m, n, y, incy, ldy ) \ +{ \ + /* Assume that the duplication factor is the row stride of y. */ \ + const dim_t _d = incy; \ + const dim_t _ds_y = 1; \ +\ + for ( dim_t _j = 0; _j < (n); ++_j ) \ + { \ + PASTEMAC(chy,ctype)* restrict yj = (PASTEMAC(chy,ctype)*)(y) + _j*(ldy); \ +\ + for ( dim_t _i = 0; _i < (m); ++_i ) \ + { \ + PASTEMAC(chy,ctyper)* restrict yij_r = (PASTEMAC(chy,ctyper)*)( (PASTEMAC(chy,ctype)*)yj + _i*(incy) ); \ + PASTEMAC(chy,ctyper)* restrict yij_i = yij_r + (incy); \ +\ + for ( dim_t _p = 1; _p < _d; ++_p ) \ + { \ + PASTEMAC(chy,ctyper)* restrict yijd_r = yij_r + _p*_ds_y; \ + PASTEMAC(chy,ctyper)* restrict yijd_i = yij_i + _p*_ds_y; (void)yijd_i; \ +\ + bli_tcopyris( chy,chy, *yij_r, *yij_i, *yijd_r, *yijd_i ); \ + } \ + } \ + } \ +} + +#define bli_tset0s_edge( chp, i, m, j, n, p, ldp ) \ +{ \ + if ( (i) < (m) ) \ + { \ + bli_tset0s_mxn \ + ( \ + chp, \ + m - i, \ + j, \ + p + (i)*1, 1, ldp \ + ); \ + } \ +\ + if ( (j) < (n) ) \ + { \ + bli_tset0s_mxn \ + ( \ + chp, \ + m, \ + n - j, \ + p + (j)*(ldp), 1, ldp \ + ); \ + } \ +} + +#endif + +// -- Notes -------------------------------------------------------------------- diff --git a/frame/include/level0/bli_tsqrt2s.h b/frame/include/level0/bli_tsqrt2s.h new file mode 100644 index 0000000000..9576683d13 --- /dev/null +++ b/frame/include/level0/bli_tsqrt2s.h @@ -0,0 +1,195 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TSQRT2S_H +#define BLIS_TSQRT2S_H + +// -- Implementation macro ----------------------------------------------------- + +#define bli_tsqrt2ims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + chc \ + ) \ +{ \ + PASTEMAC(dx,sqrtims) \ + ( \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + chc \ + ); \ +} + +// -- real domain implementation -- + +// yr = sqrt( xr ); + +#define bli_rsqrtims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + chc \ + ) \ +{ \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(chc,sqrt)( \ + PASTEMAC(px,chc,tcast)( xr ) \ + ), \ + PASTEMAC(py,zero), \ + yr, \ + yi \ + ); \ +} + +// -- complex domain implementation -- + +// mag = hypot( xr, xi ); +// tr = sqrt( ( mag + xr ) / 2.0 ); +// ti = sqrt( ( mag - xr ) / 2.0 ); +// yr = tr; +// yi = copysign( ti, xi ); + +#define bli_csqrtims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + chc \ + ) \ +{ \ + PASTEMAC(ro,declinits) \ + ( \ + chc, \ + PASTEMAC(chc,hypot)( \ + PASTEMAC(px,chc,tcast)( xr ), \ + PASTEMAC(px,chc,tcast)( xi ) \ + ), \ + mag \ + ) \ + PASTEMAC(c,declinits) \ + ( \ + chc, \ + PASTEMAC(chc,sqrt)( \ + PASTEMAC(chc,div)( \ + PASTEMAC(chc,add)( \ + mag, \ + PASTEMAC(px,chc,tcast)( xr ) \ + ), \ + PASTEMAC(chc,two) \ + ) \ + ), \ + PASTEMAC(chc,sqrt)( \ + PASTEMAC(chc,div)( \ + PASTEMAC(chc,sub)( \ + mag, \ + PASTEMAC(px,chc,tcast)( xr ) \ + ), \ + PASTEMAC(chc,two) \ + ) \ + ), \ + tr, \ + ti \ + ) \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(chc,py,tcast)( tr ), \ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,copysign)( ti, xi ) \ + ), \ + yr, \ + yi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tsqrt2s +#define bli_tsqrt2s( chx, chy, chc, x, y ) \ + bli_tsqrt2ims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// -- Exposed real/imaginary -- + +// tsqrt2ris +#define bli_tsqrt2ris( chx, chy, chc, xr, xi, yr, yi ) \ + bli_tsqrt2ims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r +// (yr) := sqrt(xr); +// (yi) xx 0 ; + +// r c +// (yr) := sqrt(xr ...); +// (yi) xx copysign(xi ...); + +// c r +// (yr) := sqrt(xr); +// (yi) := 0 ; + +// c c +// (yr) := sqrt(xr ... ); +// (yi) := copysign(xi ...); + +#endif + diff --git a/frame/include/level0/bli_tsubs.h b/frame/include/level0/bli_tsubs.h new file mode 100644 index 0000000000..7563bc561e --- /dev/null +++ b/frame/include/level0/bli_tsubs.h @@ -0,0 +1,161 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TSUBS_H +#define BLIS_TSUBS_H + +// -- Implementation macro ----------------------------------------------------- + +// (yr) = (yr) - (xr); +// (yi) = (yi) - (xi); + +#define bli_tsubims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi, \ + chc \ + ) \ +{ \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,sub)( \ + PASTEMAC(py,chc,tcast)(yr), \ + PASTEMAC(px,chc,tcast)(xr) \ + ) \ + ),\ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,sub)( \ + PASTEMAC(py,chc,tcast)(yi), \ + PASTEMAC(px,chc,tcast)(xi) \ + ) \ + ), \ + yr, \ + yi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tsubs +#define bli_tsubs( chx, chy, chc, x, y ) \ + bli_tsubims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// tsubjs +#define bli_tsubjs( chx, chy, chc, x, y ) \ + bli_tsubims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(x) \ + ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// -- Exposed real/imaginary -- + +// tsubris +#define bli_tsubris( chx, chy, chc, xr, xi, yr, yi ) \ + bli_tsubims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// tsubjris +#define bli_tsubjris( chx, chy, chc, xr, xi, yr, yi ) \ + bli_tsubims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + xi ), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r +// (yr) -= (xr); +// (yi) xx 0 ; + +// r c +// (yr) -= (xr); +// (yi) xx (xi); + +// c r +// (yr) -= (xr); +// (yi) -= 0 ; + +// c c +// (yr) -= (xr); +// (yi) -= (xi); + +#endif + diff --git a/frame/include/level0/bli_tswaps.h b/frame/include/level0/bli_tswaps.h new file mode 100644 index 0000000000..85fb24bb4e --- /dev/null +++ b/frame/include/level0/bli_tswaps.h @@ -0,0 +1,146 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TSWAPS_H +#define BLIS_TSWAPS_H + +// -- Implementation macro ----------------------------------------------------- + +// (tr) := (yr); +// (ti) := (yi); +// (yr) := (xr); +// (yi) := (xi); +// (xr) := (tr); +// (xi) := (ti); + +#define bli_tswapims( \ + \ + dx, px, xr, xi, \ + dy, py, yr, yi \ + ) \ +{ \ + /* It is important to use dx (or, alternatively, 'c') in the declinits macro + so that in the case of chy chx = r c, ti gets set to zero. The c r case + also works since ti, while unset by declinits, is unused by assigns. */ \ + PASTEMAC(dx,declinits)( py, yr, yi, tr, ti ) \ + PASTEMAC(dy,assigns) \ + ( \ + PASTEMAC(px,py,tcast)(xr),\ + PASTEMAC(px,py,tcast)(xi), \ + yr, \ + yi \ + ); \ + PASTEMAC(dx,assigns) \ + ( \ + PASTEMAC(py,px,tcast)(tr),\ + PASTEMAC(py,px,tcast)(ti), \ + xr, \ + xi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// tswaps +#define bli_tswaps( chx, chy, x, y ) \ + bli_tswapims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y) \ + ) + +// -- Exposed real/imaginary -- + +// tswapris +#define bli_tswapris( chx, chy, xr, xi, yr, yi ) \ + bli_tswapims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// chy chx: r r +// (tr) := (yr); +// (ti) := 0 ; +// (yr) := (xr); +// (yi) xx (xi); +// (xr) := (tr); +// (xi) xx (ti); + +// chy chx: r c +// (tr) := (yr); +// (ti) := 0 ; +// (yr) := (xr); +// (yi) xx (xi); +// (xr) := (tr); +// (xi) := (ti); + +// chy chx: c r +// (tr) := (yr); +// (ti) xx (yi); +// (yr) := (xr); +// (yi) := 0 ; +// (xr) := (tr); +// (xi) xx (ti); + +// chy chx: c c +// (tr) := (yr); +// (ti) := (yi); +// (yr) := (xr); +// (yi) := (xi); +// (xr) := (tr); +// (xi) := (ti); + +#endif + diff --git a/frame/include/level0/bli_txpbys.h b/frame/include/level0/bli_txpbys.h new file mode 100644 index 0000000000..8512a6e799 --- /dev/null +++ b/frame/include/level0/bli_txpbys.h @@ -0,0 +1,325 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_TXPBYS_H +#define BLIS_TXPBYS_H + +// -- Implementation macro ----------------------------------------------------- + +// (yr) := (xr) + (br) * (yr) - (bi) * (yi); +// (yi) := (xi) + (bi) * (yr) + (br) * (yi); + +#define bli_txpbyims( \ + \ + dx, px, xr, xi, \ + db, pb, br, bi, \ + dy, py, yr, yi, \ + chc \ + ) \ +{ \ + PASTEMAC(c,declinits) \ + ( \ + py, \ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(px,chc,tcast)(xr), \ + PASTEMAC(chc,sub)( \ + PASTEMAC(db,dy,termrr)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pb,chc,tcast)(br), \ + PASTEMAC(py,chc,tcast)(yr) \ + ) \ + ), \ + PASTEMAC(db,dy,termii)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pb,chc,tcast)(bi), \ + PASTEMAC(py,chc,tcast)(yi) \ + ) \ + ) \ + ) \ + ) \ + ),\ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,add)( \ + PASTEMAC(px,chc,tcast)(xi), \ + PASTEMAC(chc,add)( \ + PASTEMAC(db,dy,termir)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pb,chc,tcast)(bi), \ + PASTEMAC(py,chc,tcast)(yr) \ + ) \ + ), \ + PASTEMAC(db,dy,termri)( \ + chc, \ + PASTEMAC(chc,mul)( \ + PASTEMAC(pb,chc,tcast)(br), \ + PASTEMAC(py,chc,tcast)(yi) \ + ) \ + ) \ + ) \ + ) \ + ), \ + tr, \ + ti \ + ); \ + PASTEMAC(dy,assigns) \ + ( \ + tr, \ + ti, \ + yr, \ + yi \ + ); \ +} + +// -- API macros --------------------------------------------------------------- + +// -- Consolidated -- + +// txpbys +#define bli_txpbys( chx, chb, chy, chc, x, b, y ) \ + bli_txpbyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(chx,imag)(x), \ + PASTEMAC(chb,dom), \ + PASTEMAC(chb,prec), \ + PASTEMAC(chb,real)(b), \ + PASTEMAC(chb,imag)(b), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// txpbyjs +#define bli_txpbyjs( chx, chb, chy, chc, x, b, y ) \ + bli_txpbyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + PASTEMAC(chx,real)(x), \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + PASTEMAC(chx,imag)(x) \ + ), \ + PASTEMAC(chb,dom), \ + PASTEMAC(chb,prec), \ + PASTEMAC(chb,real)(b), \ + PASTEMAC(chb,imag)(b), \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + PASTEMAC(chy,real)(y), \ + PASTEMAC(chy,imag)(y), \ + PASTEMAC(chc,prec) \ + ) + +// -- Exposed real/imaginary -- + +// txpbyris +#define bli_txpbyris( chx, chb, chy, chc, xr, xi, br, bi, yr, yi ) \ + bli_txpbyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + xi, \ + PASTEMAC(chb,dom), \ + PASTEMAC(chb,prec), \ + br, \ + bi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// txpbyjris +#define bli_txpbyjris( chx, chb, chy, chc, xr, xi, br, bi, yr, yi ) \ + bli_txpbyims \ + ( \ + PASTEMAC(chx,dom), \ + PASTEMAC(chx,prec), \ + xr, \ + PASTEMAC(PASTEMAC(chx,prec),neg)( \ + xi ), \ + PASTEMAC(chb,dom), \ + PASTEMAC(chb,prec), \ + br, \ + bi, \ + PASTEMAC(chy,dom), \ + PASTEMAC(chy,prec), \ + yr, \ + yi, \ + PASTEMAC(chc,prec) \ + ) + +// -- Higher-level static functions -------------------------------------------- + +// -- mxn -- + +// xpbys_mxn +#define bli_txpbys_mxn( chx, chb, chy, chc, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ +{ \ +\ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_teq0s( chb, *(beta) ) ) \ + { \ + bli_tcopys_mxn( chx, chy, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ + } \ + else \ + { \ + for ( dim_t jj = 0; jj < n; ++jj ) \ + for ( dim_t ii = 0; ii < m; ++ii ) \ + { \ + PASTEMAC(chx,ctype)* restrict xij = x + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = y + ii*(rs_y) + jj*(cs_y); \ +\ + bli_txpbys( chx,chb,chy,chc, *xij, *(beta), *yij ); \ + } \ + } \ +} + +// xpbys_mxn_uplo +#define bli_txpbys_mxn_uplo( chx, chb, chy, chc, diagoff, uplo, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ +{ \ + if ( bli_is_upper( uplo ) ) \ + { \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_teq0s( chb, *(beta) ) ) \ + { \ + for ( dim_t jj = 0; jj < n; ++jj ) \ + for ( dim_t ii = 0; ii < m; ++ii ) \ + { \ + if ( (doff_t)jj - (doff_t)ii >= (diagoff) ) \ + { \ + const PASTEMAC(chx,ctype)* restrict xij = x + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = y + ii*(rs_y) + jj*(cs_y); \ +\ + bli_tcopys( chx,chy, *xij, *yij ); \ + } \ + } \ + } \ + else \ + { \ + for ( dim_t jj = 0; jj < n; ++jj ) \ + for ( dim_t ii = 0; ii < m; ++ii ) \ + { \ + if ( (doff_t)jj - (doff_t)ii >= (diagoff) ) \ + { \ + const PASTEMAC(chx,ctype)* restrict xij = x + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = y + ii*(rs_y) + jj*(cs_y); \ +\ + bli_txpbys( chx,chb,chy,chc, *xij, *(beta), *yij ); \ + } \ + } \ + } \ + } \ + else /* if ( bli_is_lower( uplo ) ) */ \ + { \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_teq0s( chb, *(beta) ) ) \ + { \ + for ( dim_t jj = 0; jj < n; ++jj ) \ + for ( dim_t ii = 0; ii < m; ++ii ) \ + { \ + if ( (doff_t)jj - (doff_t)ii <= (diagoff) ) \ + { \ + const PASTEMAC(chx,ctype)* restrict xij = x + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = y + ii*(rs_y) + jj*(cs_y); \ +\ + bli_tcopys( chx,chy, *xij, *yij ); \ + } \ + } \ + } \ + else \ + { \ + for ( dim_t jj = 0; jj < n; ++jj ) \ + for ( dim_t ii = 0; ii < m; ++ii ) \ + { \ + if ( (doff_t)jj - (doff_t)ii <= (diagoff) ) \ + { \ + const PASTEMAC(chx,ctype)* restrict xij = x + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = y + ii*(rs_y) + jj*(cs_y); \ +\ + bli_txpbys( chx,chb,chy,chc, *xij, *(beta), *yij ); \ + } \ + } \ + } \ + } \ +} + +// -- Notes -------------------------------------------------------------------- + +// -- Domain cases -- + +// r r r +// (yr) := (xr) + (br) * (yr) - 0 * 0 ; +// (yi) xx 0 + 0 * (yr) + (br) * 0 ; + +// r r c +// (yr) := (xr) + (br) * (yr) - (bi) * 0 ; +// (yi) xx 0 + (bi) * (yr) + (br) * 0 ; + +// r c r +// (yr) := (xr) + (br) * (yr) - 0 * 0 ; +// (yi) xx (xi) + 0 * (yr) + (br) * 0 ; + +// r c c +// (yr) := (xr) + (br) * (yr) - (bi) * 0 ; +// (yi) xx (xi) + (bi) * (yr) + (br) * 0 ; + +// c r r +// (yr) := (xr) + (br) * (yr) - 0 * (yi); +// (yi) := 0 + 0 * (yr) + (br) * (yi); + +// c r c +// (yr) := (xr) + (br) * (yr) - (bi) * (yi); +// (yi) := 0 + (bi) * (yr) + (br) * (yi); + +// c c r +// (yr) := (xr) + (br) * (yr) - 0 * (yi); +// (yi) := (xi) + 0 * (yr) + (br) * (yi); + +// c c c +// (yr) := (xr) + (br) * (yr) - (bi) * (yi); +// (yi) := (xi) + (bi) * (yr) + (br) * (yi); + +#endif + diff --git a/frame/include/level0/bli_xpbyjs.h b/frame/include/level0/bli_xpbyjs.h deleted file mode 100644 index 3126237376..0000000000 --- a/frame/include/level0/bli_xpbyjs.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_XPBYJS_H -#define BLIS_XPBYJS_H - -// xpbyjs - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of b. -// - The third char encodes the type of y. - -// -- (xby) = (??s) ------------------------------------------------------------ - -#define bli_sssxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dssxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cssxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zssxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) - -#define bli_sdsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_ddsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cdsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zdsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) - -#define bli_scsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dcsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_ccsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zcsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) - -#define bli_szsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dzsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_czsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zzsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) - -// -- (xby) = (??d) ------------------------------------------------------------ - -#define bli_ssdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dsdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_csdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zsdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) - -#define bli_sddxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dddxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cddxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zddxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) - -#define bli_scdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dcdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_ccdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zcdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) - -#define bli_szdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dzdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_czdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zzdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -// -- (xby) = (??c) ------------------------------------------------------------ - -#define bli_sscxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dscxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cscxpbyjs( x, b, y ) bli_crxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zscxpbyjs( x, b, y ) bli_crxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) - -#define bli_sdcxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_ddcxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cdcxpbyjs( x, b, y ) bli_crxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zdcxpbyjs( x, b, y ) bli_crxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) - -#define bli_sccxpbyjs( x, b, y ) bli_cxxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dccxpbyjs( x, b, y ) bli_cxxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cccxpbyjs( x, b, y ) bli_cxxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zccxpbyjs( x, b, y ) bli_cxxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) - -#define bli_szcxpbyjs( x, b, y ) bli_cxxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dzcxpbyjs( x, b, y ) bli_cxxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_czcxpbyjs( x, b, y ) bli_cxxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zzcxpbyjs( x, b, y ) bli_cxxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) - -// -- (xby) = (??z) ------------------------------------------------------------ - -#define bli_sszxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dszxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cszxpbyjs( x, b, y ) bli_crxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zszxpbyjs( x, b, y ) bli_crxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) - -#define bli_sdzxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_ddzxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cdzxpbyjs( x, b, y ) bli_crxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zdzxpbyjs( x, b, y ) bli_crxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) - -#define bli_sczxpbyjs( x, b, y ) bli_cxxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dczxpbyjs( x, b, y ) bli_cxxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cczxpbyjs( x, b, y ) bli_cxxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zczxpbyjs( x, b, y ) bli_cxxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) - -#define bli_szzxpbyjs( x, b, y ) bli_cxxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dzzxpbyjs( x, b, y ) bli_cxxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_czzxpbyjs( x, b, y ) bli_cxxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zzzxpbyjs( x, b, y ) bli_cxxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -// -- (xby) = (??c) ------------------------------------------------------------ - -#define bli_sscxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_dscxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_cscxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zscxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } - -#define bli_sdcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_ddcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_cdcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zdcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } - -#define bli_sccxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_dccxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_cccxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zccxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } - -#define bli_szcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_dzcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_czcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zzcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } - -// -- (xby) = (??z) ------------------------------------------------------------ - -#define bli_sszxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_dszxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_cszxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zszxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } - -#define bli_sdzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_ddzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_cdzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zdzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } - -#define bli_sczxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_dczxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_cczxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zczxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } - -#define bli_szzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_dzzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_czzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zzzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_sxpbyjs( x, b, y ) bli_sssxpbyjs( x, b, y ) -#define bli_dxpbyjs( x, b, y ) bli_dddxpbyjs( x, b, y ) -#define bli_cxpbyjs( x, b, y ) bli_cccxpbyjs( x, b, y ) -#define bli_zxpbyjs( x, b, y ) bli_zzzxpbyjs( x, b, y ) - - -#endif - diff --git a/frame/include/level0/bli_xpbys.h b/frame/include/level0/bli_xpbys.h deleted file mode 100644 index f57fec76c4..0000000000 --- a/frame/include/level0/bli_xpbys.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_XPBYS_H -#define BLIS_XPBYS_H - -// xpbys - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of b. -// - The third char encodes the type of y. - -// -- (xby) = (??s) ------------------------------------------------------------ - -#define bli_sssxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dssxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cssxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zssxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) - -#define bli_sdsxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_ddsxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_cdsxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zdsxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) - -#define bli_scsxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dcsxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_ccsxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zcsxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) - -#define bli_szsxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_dzsxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_czsxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) -#define bli_zzsxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) - -// -- (xby) = (??d) ------------------------------------------------------------ - -#define bli_ssdxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dsdxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_csdxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zsdxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) - -#define bli_sddxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dddxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_cddxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zddxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) - -#define bli_scdxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dcdxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_ccdxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zcdxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) - -#define bli_szdxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_dzdxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_czdxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) -#define bli_zzdxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) - -#ifndef BLIS_ENABLE_C99_COMPLEX - -// -- (xby) = (??c) ------------------------------------------------------------ - -#define bli_sscxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dscxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cscxpbys( x, b, y ) bli_crxpbyris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zscxpbys( x, b, y ) bli_crxpbyris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) - -#define bli_sdcxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_ddcxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cdcxpbys( x, b, y ) bli_crxpbyris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zdcxpbys( x, b, y ) bli_crxpbyris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) - -#define bli_sccxpbys( x, b, y ) bli_cxxpbyris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dccxpbys( x, b, y ) bli_cxxpbyris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_cccxpbys( x, b, y ) bli_cxxpbyris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zccxpbys( x, b, y ) bli_cxxpbyris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) - -#define bli_szcxpbys( x, b, y ) bli_cxxpbyris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_dzcxpbys( x, b, y ) bli_cxxpbyris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_czcxpbys( x, b, y ) bli_cxxpbyris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) -#define bli_zzcxpbys( x, b, y ) bli_cxxpbyris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) - -// -- (xby) = (??z) ------------------------------------------------------------ - -#define bli_sszxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dszxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cszxpbys( x, b, y ) bli_crxpbyris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zszxpbys( x, b, y ) bli_crxpbyris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) - -#define bli_sdzxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_ddzxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cdzxpbys( x, b, y ) bli_crxpbyris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zdzxpbys( x, b, y ) bli_crxpbyris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) - -#define bli_sczxpbys( x, b, y ) bli_cxxpbyris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dczxpbys( x, b, y ) bli_cxxpbyris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_cczxpbys( x, b, y ) bli_cxxpbyris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zczxpbys( x, b, y ) bli_cxxpbyris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) - -#define bli_szzxpbys( x, b, y ) bli_cxxpbyris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_dzzxpbys( x, b, y ) bli_cxxpbyris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_czzxpbys( x, b, y ) bli_cxxpbyris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) -#define bli_zzzxpbys( x, b, y ) bli_cxxpbyris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - -// -- (xby) = (??c) ------------------------------------------------------------ - -#define bli_sscxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_dscxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_cscxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zscxpbys( x, b, y ) { (y) = (x) + (b) * (y); } - -#define bli_sdcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_ddcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_cdcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zdcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } - -#define bli_sccxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_dccxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_cccxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zccxpbys( x, b, y ) { (y) = (x) + (b) * (y); } - -#define bli_szcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_dzcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_czcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zzcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } - -// -- (xby) = (??z) ------------------------------------------------------------ - -#define bli_sszxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_dszxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_cszxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zszxpbys( x, b, y ) { (y) = (x) + (b) * (y); } - -#define bli_sdzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_ddzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_cdzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zdzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } - -#define bli_sczxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_dczxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_cczxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zczxpbys( x, b, y ) { (y) = (x) + (b) * (y); } - -#define bli_szzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_dzzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_czzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } -#define bli_zzzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_sxpbys( x, b, y ) bli_sssxpbys( x, b, y ) -#define bli_dxpbys( x, b, y ) bli_dddxpbys( x, b, y ) -#define bli_cxpbys( x, b, y ) bli_cccxpbys( x, b, y ) -#define bli_zxpbys( x, b, y ) bli_zzzxpbys( x, b, y ) - - -#endif - diff --git a/frame/include/level0/bli_xpbys_mxn.h b/frame/include/level0/bli_xpbys_mxn.h deleted file mode 100644 index d3174289f6..0000000000 --- a/frame/include/level0/bli_xpbys_mxn.h +++ /dev/null @@ -1,830 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_XPBYS_MXN_H -#define BLIS_XPBYS_MXN_H - -// xpbys_mxn - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of b. -// - The third char encodes the type of y. -// - We only implement cases where typeof(b) == typeof(y). - -#undef BLIS_ENABLE_CR_CASES -#define BLIS_ENABLE_CR_CASES 0 - -// -- bli_???xpbys_mxn -- - -#undef GENTFUNC2 -#define GENTFUNC2( ctypex, ctypey, chx, chy, opname, kername ) \ -\ -BLIS_INLINE void PASTEMAC(chx,chy,chy,opname) \ - ( \ - const dim_t m, \ - const dim_t n, \ - const ctypex* x, inc_t rs_x, inc_t cs_x, \ - const ctypey* beta, \ - ctypey* y, inc_t rs_y, inc_t cs_y \ - ) \ -{ \ - /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ - if ( PASTEMAC(chy,eq0)( *beta ) ) \ - { \ - PASTEMAC(chx,chy,copys_mxn)( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ - return; \ - } \ -\ - if ( BLIS_ENABLE_CR_CASES && rs_x == 1 && rs_y == 1 ) \ - { \ - for ( dim_t jj = 0; jj < n; ++jj ) \ - for ( dim_t ii = 0; ii < m; ++ii ) \ - PASTEMAC(chx,chy,chy,kername) \ - ( \ - *(x + ii + jj*cs_x), *beta, \ - *(y + ii + jj*cs_y) \ - ); \ - } \ - else if ( BLIS_ENABLE_CR_CASES && cs_x == 1 && cs_y == 1 ) \ - { \ - for ( dim_t ii = 0; ii < m; ++ii ) \ - for ( dim_t jj = 0; jj < n; ++jj ) \ - PASTEMAC(chx,chy,chy,kername) \ - ( \ - *(x + ii*rs_x + jj), *beta, \ - *(y + ii*rs_y + jj) \ - ); \ - } \ - else \ - { \ - for ( dim_t jj = 0; jj < n; ++jj ) \ - for ( dim_t ii = 0; ii < m; ++ii ) \ - PASTEMAC(chx,chy,chy,kername) \ - ( \ - *(x + ii*rs_x + jj*cs_x), *beta, \ - *(y + ii*rs_y + jj*cs_y) \ - ); \ - } \ -} - -INSERT_GENTFUNC2_BASIC ( xpbys_mxn, xpbys ) -INSERT_GENTFUNC2_MIX_DP( xpbys_mxn, xpbys ) - - -// -- bli_?xpbys_mxn -- - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname ) \ -\ -BLIS_INLINE void PASTEMAC(ch,opname) \ - ( \ - const dim_t m, \ - const dim_t n, \ - const ctype* x, inc_t rs_x, inc_t cs_x, \ - const ctype* beta, \ - ctype* y, inc_t rs_y, inc_t cs_y \ - ) \ -{ \ - PASTEMAC(ch,ch,ch,opname)( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ -} - -INSERT_GENTFUNC_BASIC( xpbys_mxn ) - - - -#if 0 -// -- (xby) = (?ss) ------------------------------------------------------------ - -BLIS_INLINE void bli_sssxpbys_mxn - ( - const dim_t m, - const dim_t n, - const float* restrict x, const inc_t rs_x, const inc_t cs_x, - const float* restrict beta, - float* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_seq0( *beta ) ) - { - bli_sscopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_sssxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_sssxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_sssxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_dssxpbys_mxn - ( - const dim_t m, - const dim_t n, - const double* restrict x, const inc_t rs_x, const inc_t cs_x, - const float* restrict beta, - float* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_seq0( *beta ) ) - { - bli_dscopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dssxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_dssxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dssxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_cssxpbys_mxn - ( - const dim_t m, - const dim_t n, - const scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - const float* restrict beta, - float* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_seq0( *beta ) ) - { - bli_cscopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_cssxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_cssxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_cssxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_zssxpbys_mxn - ( - const dim_t m, - const dim_t n, - const dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - const float* restrict beta, - float* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_seq0( *beta ) ) - { - bli_zscopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zssxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_zssxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zssxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} - -// -- (xby) = (?dd) ------------------------------------------------------------ - -BLIS_INLINE void bli_sddxpbys_mxn - ( - const dim_t m, - const dim_t n, - const float* restrict x, const inc_t rs_x, const inc_t cs_x, - const double* restrict beta, - double* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_deq0( *beta ) ) - { - bli_sdcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_sddxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_sddxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_sddxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_dddxpbys_mxn - ( - const dim_t m, - const dim_t n, - const double* restrict x, const inc_t rs_x, const inc_t cs_x, - const double* restrict beta, - double* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_deq0( *beta ) ) - { - bli_ddcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dddxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_dddxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dddxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_cddxpbys_mxn - ( - const dim_t m, - const dim_t n, - const scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - const double* restrict beta, - double* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_deq0( *beta ) ) - { - bli_cdcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_cddxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_cddxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_cddxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_zddxpbys_mxn - ( - const dim_t m, - const dim_t n, - const dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - const double* restrict beta, - double* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_deq0( *beta ) ) - { - bli_zdcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zddxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_zddxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zddxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} - -// -- (xby) = (?cc) ------------------------------------------------------------ - -BLIS_INLINE void bli_sccxpbys_mxn - ( - const dim_t m, - const dim_t n, - const float* restrict x, const inc_t rs_x, const inc_t cs_x, - const scomplex* restrict beta, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_ceq0( *beta ) ) - { - bli_sccopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_sccxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_sccxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_sccxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_dccxpbys_mxn - ( - const dim_t m, - const dim_t n, - const double* restrict x, const inc_t rs_x, const inc_t cs_x, - const scomplex* restrict beta, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_ceq0( *beta ) ) - { - bli_dccopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dccxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_dccxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dccxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_cccxpbys_mxn - ( - const dim_t m, - const dim_t n, - const scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - const scomplex* restrict beta, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_ceq0( *beta ) ) - { - bli_cccopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_cccxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_cccxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_cccxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_zccxpbys_mxn - ( - const dim_t m, - const dim_t n, - const dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - const scomplex* restrict beta, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_ceq0( *beta ) ) - { - bli_zccopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zccxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_zccxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zccxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} - -// -- (xby) = (?zz) ------------------------------------------------------------ - -BLIS_INLINE void bli_szzxpbys_mxn - ( - const dim_t m, - const dim_t n, - const float* restrict x, const inc_t rs_x, const inc_t cs_x, - const dcomplex* restrict beta, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_zeq0( *beta ) ) - { - bli_szcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_szzxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_szzxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_szzxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_dzzxpbys_mxn - ( - const dim_t m, - const dim_t n, - const double* restrict x, const inc_t rs_x, const inc_t cs_x, - const dcomplex* restrict beta, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_zeq0( *beta ) ) - { - bli_dzcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dzzxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_dzzxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_dzzxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_czzxpbys_mxn - ( - const dim_t m, - const dim_t n, - const scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - const dcomplex* restrict beta, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_zeq0( *beta ) ) - { - bli_czcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_czzxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_czzxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_czzxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} -BLIS_INLINE void bli_zzzxpbys_mxn - ( - const dim_t m, - const dim_t n, - const dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - const dcomplex* restrict beta, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - // If beta is zero, overwrite y with x (in case y has infs or NaNs). - if ( bli_zeq0( *beta ) ) - { - bli_zzcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); - return; - } - -#ifdef BLIS_ENABLE_CR_CASES - if ( rs_x == 1 && rs_y == 1 ) - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zzzxpbys( *(x + ii + jj*cs_x), *beta, - *(y + ii + jj*cs_y) ); - } - else if ( cs_x == 1 && cs_y == 1 ) - { - for ( dim_t ii = 0; ii < m; ++ii ) - for ( dim_t jj = 0; jj < n; ++jj ) - bli_zzzxpbys( *(x + ii*rs_x + jj), *beta, - *(y + ii*rs_y + jj) ); - } - else -#endif - { - for ( dim_t jj = 0; jj < n; ++jj ) - for ( dim_t ii = 0; ii < m; ++ii ) - bli_zzzxpbys( *(x + ii*rs_x + jj*cs_x), *beta, - *(y + ii*rs_y + jj*cs_y) ); - } -} - - - - -BLIS_INLINE void bli_sxpbys_mxn - ( - const dim_t m, - const dim_t n, - const float* restrict x, const inc_t rs_x, const inc_t cs_x, - const float* restrict beta, - float* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - bli_sssxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); -} -BLIS_INLINE void bli_dxpbys_mxn - ( - const dim_t m, - const dim_t n, - const double* restrict x, const inc_t rs_x, const inc_t cs_x, - const double* restrict beta, - double* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - bli_dddxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); -} -BLIS_INLINE void bli_cxpbys_mxn - ( - const dim_t m, - const dim_t n, - const scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - const scomplex* restrict beta, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - bli_cccxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); -} -BLIS_INLINE void bli_zxpbys_mxn - ( - const dim_t m, - const dim_t n, - const dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - const dcomplex* restrict beta, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - bli_zzzxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); -} -#endif - - -#endif diff --git a/frame/include/level0/bli_xpbys_mxn_uplo.h b/frame/include/level0/bli_xpbys_mxn_uplo.h deleted file mode 100644 index 1c50a8cf4d..0000000000 --- a/frame/include/level0/bli_xpbys_mxn_uplo.h +++ /dev/null @@ -1,300 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_XPBYS_MXN_UPLO_H -#define BLIS_XPBYS_MXN_UPLO_H - -// xpbys_mxn_u - -#define bli_sssxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ - if ( bli_seq0( *beta ) ) \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ - { \ - bli_sscopys( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ - { \ - bli_sssxpbys( *(x + _i*rs_x + _j*cs_x), \ - *(beta), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ -} - -#define bli_dddxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ - if ( bli_deq0( *beta ) ) \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ - { \ - bli_ddcopys( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ - { \ - bli_dddxpbys( *(x + _i*rs_x + _j*cs_x), \ - *(beta), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ -} - -#define bli_cccxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ - if ( bli_ceq0( *beta ) ) \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ - { \ - bli_cccopys( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ - { \ - bli_cccxpbys( *(x + _i*rs_x + _j*cs_x), \ - *(beta), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ -} - -#define bli_zzzxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ - if ( bli_zeq0( *beta ) ) \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ - { \ - bli_zzcopys( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ - { \ - bli_zzzxpbys( *(x + _i*rs_x + _j*cs_x), \ - *(beta), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ -} - -// xpbys_mxn_l - -#define bli_sssxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ - if ( bli_seq0( *beta ) ) \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ - { \ - bli_sscopys( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ - { \ - bli_sssxpbys( *(x + _i*rs_x + _j*cs_x), \ - *(beta), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ -} - -#define bli_dddxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ - if ( bli_deq0( *beta ) ) \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ - { \ - bli_ddcopys( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ - { \ - bli_dddxpbys( *(x + _i*rs_x + _j*cs_x), \ - *(beta), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ -} - -#define bli_cccxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ - if ( bli_ceq0( *beta ) ) \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ - { \ - bli_cccopys( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ - { \ - bli_cccxpbys( *(x + _i*rs_x + _j*cs_x), \ - *(beta), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ -} - -#define bli_zzzxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ - if ( bli_zeq0( *beta ) ) \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ - { \ - bli_zzcopys( *(x + _i*rs_x + _j*cs_x), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else \ - { \ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ - { \ - bli_zzzxpbys( *(x + _i*rs_x + _j*cs_x), \ - *(beta), \ - *(y + _i*rs_y + _j*cs_y) ); \ - } \ - } \ -} - - -#define bli_sxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{\ - bli_sssxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ -} -#define bli_dxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{\ - bli_dddxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ -} -#define bli_cxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{\ - bli_cccxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ -} -#define bli_zxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{\ - bli_zzzxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ -} -#define bli_sxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{\ - bli_sssxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ -} -#define bli_dxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{\ - bli_dddxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ -} -#define bli_cxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{\ - bli_cccxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ -} -#define bli_zxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ -{\ - bli_zzzxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ -} - -#endif diff --git a/frame/include/level0/old/bli_cast.h b/frame/include/level0/old/bli_cast.h deleted file mode 100644 index f54b9cd963..0000000000 --- a/frame/include/level0/old/bli_cast.h +++ /dev/null @@ -1,150 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CAST_H -#define BLIS_CAST_H - -// cast - -// Notes: -// - The first char encodes the type of *ap. -// - The second char encodes the type of b. - - -#define bli_sscast( ap, b ) \ -{ \ - (b) = ( float ) *(( float* )(ap)); \ -} -#define bli_dscast( ap, b ) \ -{ \ - (b) = ( float ) *(( double* )(ap)); \ -} -#define bli_cscast( ap, b ) \ -{ \ - (b) = ( float ) bli_creal( *(( scomplex* )(ap)) ); \ -} -#define bli_zscast( ap, b ) \ -{ \ - (b) = ( float ) bli_zreal( *(( dcomplex* )(ap)) ); \ -} - - -#define bli_sdcast( ap, b ) \ -{ \ - (b) = ( double ) *(( float* )(ap)); \ -} -#define bli_ddcast( ap, b ) \ -{ \ - (b) = ( double ) *(( double* )(ap)); \ -} -#define bli_cdcast( ap, b ) \ -{ \ - (b) = ( double ) bli_creal( *(( scomplex* )(ap)) ); \ -} -#define bli_zdcast( ap, b ) \ -{ \ - (b) = ( double ) bli_zreal( *(( dcomplex* )(ap)) ); \ -} - - -#ifndef BLIS_ENABLE_C99_COMPLEX - - -#define bli_sccast( ap, b ) \ -{ \ - bli_scsets( bli_sreal( *(( float* )(ap)) ), \ - 0.0, (b) ); \ -} -#define bli_dccast( ap, b ) \ -{ \ - bli_dcsets( bli_dreal( *(( double* )(ap)) ), \ - 0.0, (b) ); \ -} -#define bli_cccast( ap, b ) \ -{ \ - bli_ccsets( bli_creal( *(( scomplex* )(ap)) ), \ - bli_cimag( *(( scomplex* )(ap)) ), (b) ); \ -} -#define bli_zccast( ap, b ) \ -{ \ - bli_zcsets( bli_zreal( *(( dcomplex* )(ap)) ), \ - bli_zimag( *(( dcomplex* )(ap)) ), (b) ); \ -} - - -#define bli_szcast( ap, b ) \ -{ \ - bli_szsets( bli_sreal( *(( float* )(ap)) ), \ - 0.0, (b) ); \ -} -#define bli_dzcast( ap, b ) \ -{ \ - bli_dzsets( bli_dreal( *(( double* )(ap)) ), \ - 0.0, (b) ); \ -} -#define bli_czcast( ap, b ) \ -{ \ - bli_czsets( bli_creal( *(( scomplex* )(ap)) ), \ - bli_cimag( *(( scomplex* )(ap)) ), (b) ); \ -} -#define bli_zzcast( ap, b ) \ -{ \ - bli_zzsets( bli_zreal( *(( dcomplex* )(ap)) ), \ - bli_zimag( *(( dcomplex* )(ap)) ), (b) ); \ -} - - -#else // ifdef BLIS_ENABLE_C99_COMPLEX - - -#define bli_sccast( ap, b ) { (b) = ( scomplex ) *(( float* )(ap)); } -#define bli_dccast( ap, b ) { (b) = ( scomplex ) *(( double* )(ap)); } -#define bli_cccast( ap, b ) { (b) = ( scomplex ) *(( scomplex* )(ap)); } -#define bli_zccast( ap, b ) { (b) = ( scomplex ) *(( dcomplex* )(ap)); } - -#define bli_szcast( ap, b ) { (b) = ( dcomplex ) *(( float* )(ap)); } -#define bli_dzcast( ap, b ) { (b) = ( dcomplex ) *(( double* )(ap)); } -#define bli_czcast( ap, b ) { (b) = ( dcomplex ) *(( scomplex* )(ap)); } -#define bli_zzcast( ap, b ) { (b) = ( dcomplex ) *(( dcomplex* )(ap)); } - - -#endif // BLIS_ENABLE_C99_COMPLEX - - -#define bli_scast( ap, b ) bli_sscast( ap, b ) -#define bli_dcast( ap, b ) bli_ddcast( ap, b ) -#define bli_ccast( ap, b ) bli_cccast( ap, b ) -#define bli_zcast( ap, b ) bli_zzcast( ap, b ) - -#endif diff --git a/frame/include/level0/old/bli_castfrom.h b/frame/include/level0/old/bli_castfrom.h deleted file mode 100644 index 52e6a98b8e..0000000000 --- a/frame/include/level0/old/bli_castfrom.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ diff --git a/frame/include/level0/old/bli_castto.h b/frame/include/level0/old/bli_castto.h deleted file mode 100644 index 52e6a98b8e..0000000000 --- a/frame/include/level0/old/bli_castto.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ diff --git a/frame/include/level0/old/bli_copynzjs.h b/frame/include/level0/old/bli_copynzjs.h deleted file mode 100644 index ce82ee1c77..0000000000 --- a/frame/include/level0/old/bli_copynzjs.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPYNZJS_H -#define BLIS_COPYNZJS_H - -// copynzjs - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. -// - x is copied in conjugated form. - -#define bli_sscopynzjs( x, y ) \ -{ \ - (y) = ( float ) (x); \ -} -#define bli_dscopynzjs( x, y ) \ -{ \ - (y) = ( float ) (x); \ -} -#define bli_cscopynzjs( x, y ) \ -{ \ - (y) = ( float ) (x).real; \ -} -#define bli_zscopynzjs( x, y ) \ -{ \ - (y) = ( float ) (x).real; \ -} - -#define bli_sdcopynzjs( x, y ) \ -{ \ - (y) = ( double ) (x); \ -} -#define bli_ddcopynzjs( x, y ) \ -{ \ - (y) = ( double ) (x); \ -} -#define bli_cdcopynzjs( x, y ) \ -{ \ - (y) = ( double ) (x).real; \ -} -#define bli_zdcopynzjs( x, y ) \ -{ \ - (y) = ( double ) (x).real; \ -} - -#define bli_sccopynzjs( x, y ) \ -{ \ - (y).real = ( float ) (x); \ - /* (y).imag = 0.0F; (SKIP COPYING OF ZERO) */ \ -} -#define bli_dccopynzjs( x, y ) \ -{ \ - (y).real = ( float ) (x); \ - /* (y).imag = 0.0F; (SKIP COPYING OF ZERO) */ \ -} -#define bli_cccopynzjs( x, y ) \ -{ \ - (y).real = ( float ) (x).real; \ - (y).imag = ( float ) -(x).imag; \ -} -#define bli_zccopynzjs( x, y ) \ -{ \ - (y).real = ( float ) (x).real; \ - (y).imag = ( float ) -(x).imag; \ -} - -#define bli_szcopynzjs( x, y ) \ -{ \ - (y).real = ( double ) (x); \ - /* (y).imag = 0.0; (SKIP COPYING OF ZERO) */ \ -} -#define bli_dzcopynzjs( x, y ) \ -{ \ - (y).real = ( double ) (x); \ - /* (y).imag = 0.0; (SKIP COPYING OF ZERO) */ \ -} -#define bli_czcopynzjs( x, y ) \ -{ \ - (y).real = ( double ) (x).real; \ - (y).imag = ( double ) -(x).imag; \ -} -#define bli_zzcopynzjs( x, y ) \ -{ \ - (y).real = ( double ) (x).real; \ - (y).imag = ( double ) -(x).imag; \ -} - - -#define bli_scopynzjs( x, y ) \ -{ \ - bli_sscopynzjs( x, y ); \ -} -#define bli_dcopynzjs( x, y ) \ -{ \ - bli_ddcopynzjs( x, y ); \ -} -#define bli_ccopynzjs( x, y ) \ -{ \ - bli_cccopynzjs( x, y ); \ -} -#define bli_zcopynzjs( x, y ) \ -{ \ - bli_zzcopynzjs( x, y ); \ -} - - -#endif diff --git a/frame/include/level0/old/bli_copynzs.h b/frame/include/level0/old/bli_copynzs.h deleted file mode 100644 index a8fe71a6cb..0000000000 --- a/frame/include/level0/old/bli_copynzs.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPYNZS_H -#define BLIS_COPYNZS_H - -// copynzs - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of y. - -#define bli_sscopynzs( x, y ) \ -{ \ - (y) = ( float ) (x); \ -} -#define bli_dscopynzs( x, y ) \ -{ \ - (y) = ( float ) (x); \ -} -#define bli_cscopynzs( x, y ) \ -{ \ - (y) = ( float ) (x).real; \ -} -#define bli_zscopynzs( x, y ) \ -{ \ - (y) = ( float ) (x).real; \ -} - -#define bli_sdcopynzs( x, y ) \ -{ \ - (y) = ( double ) (x); \ -} -#define bli_ddcopynzs( x, y ) \ -{ \ - (y) = ( double ) (x); \ -} -#define bli_cdcopynzs( x, y ) \ -{ \ - (y) = ( double ) (x).real; \ -} -#define bli_zdcopynzs( x, y ) \ -{ \ - (y) = ( double ) (x).real; \ -} - -#define bli_sccopynzs( x, y ) \ -{ \ - (y).real = ( float ) (x); \ - /* (y).imag = 0.0F; (SKIP COPYING OF ZERO) */ \ -} -#define bli_dccopynzs( x, y ) \ -{ \ - (y).real = ( float ) (x); \ - /* (y).imag = 0.0F (SKIP COPYING OF ZERO) */; \ -} -#define bli_cccopynzs( x, y ) \ -{ \ - (y).real = ( float ) (x).real; \ - (y).imag = ( float ) (x).imag; \ -} -#define bli_zccopynzs( x, y ) \ -{ \ - (y).real = ( float ) (x).real; \ - (y).imag = ( float ) (x).imag; \ -} - -#define bli_szcopynzs( x, y ) \ -{ \ - (y).real = ( double ) (x); \ - /* (y).imag = 0.0; (SKIP COPYING OF ZERO) */ \ -} -#define bli_dzcopynzs( x, y ) \ -{ \ - (y).real = ( double ) (x); \ - /* (y).imag = 0.0; (SKIP COPYING OF ZERO) */ \ -} -#define bli_czcopynzs( x, y ) \ -{ \ - (y).real = ( double ) (x).real; \ - (y).imag = ( double ) (x).imag; \ -} -#define bli_zzcopynzs( x, y ) \ -{ \ - (y).real = ( double ) (x).real; \ - (y).imag = ( double ) (x).imag; \ -} - - -#define bli_scopynzs( x, y ) \ -{ \ - bli_sscopynzs( x, y ); \ -} -#define bli_dcopynzs( x, y ) \ -{ \ - bli_ddcopynzs( x, y ); \ -} -#define bli_ccopynzs( x, y ) \ -{ \ - bli_cccopynzs( x, y ); \ -} -#define bli_zcopynzs( x, y ) \ -{ \ - bli_zzcopynzs( x, y ); \ -} - - -#endif diff --git a/frame/include/level0/old/bli_invscalcjs.h b/frame/include/level0/old/bli_invscalcjs.h deleted file mode 100644 index 983d7622ce..0000000000 --- a/frame/include/level0/old/bli_invscalcjs.h +++ /dev/null @@ -1,159 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_INVSCALCJS_H -#define BLIS_INVSCALCJS_H - -// invscalcjs - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. - -#define bli_ssinvscalcjs( conj, a, x ) \ -{ \ - (x) /= ( float ) (a); \ -} -#define bli_dsinvscalcjs( conj, a, x ) \ -{ \ - (x) /= ( float ) (a); \ -} -#define bli_csinvscalcjs( conj, a, x ) \ -{ \ - (x) /= ( float ) (a).real; \ -} -#define bli_zsinvscalcjs( conj, a, x ) \ -{ \ - (x) /= ( float ) (a).real; \ -} - -#define bli_sdinvscalcjs( conj, a, x ) \ -{ \ - (x) /= ( double ) (a); \ -} -#define bli_ddinvscalcjs( conj, a, x ) \ -{ \ - (x) /= ( double ) (a); \ -} -#define bli_cdinvscalcjs( conj, a, x ) \ -{ \ - (x) /= ( double ) (a).real; \ -} -#define bli_zdinvscalcjs( conj, a, x ) \ -{ \ - (x) /= ( double ) (a).real; \ -} - -#define bli_scinvscalcjs( conj, a, x ) \ -{ \ - (x).real /= ( float ) (a); \ - (x).imag /= ( float ) (a); \ -} -#define bli_dcinvscalcjs( conj, a, x ) \ -{ \ - (x).real /= ( float ) (a); \ - (x).imag /= ( float ) (a); \ -} -#define bli_ccinvscalcjs( conj, a, x ) \ -{ \ - float aimag = ( bli_is_conj( conj ) ? ( float ) -(a).imag : \ - ( float ) (a).imag ); \ - float temp = ( float ) (a).real * (a).real + ( float ) aimag * (a).imag; \ - float xr = ( float ) ( ( float ) (a).real * (x).real + ( float ) aimag * (x).imag ) / temp; \ - float xi = ( float ) ( ( float ) (a).real * (x).imag - ( float ) aimag * (x).real ) / temp; \ - (x).real = xr; \ - (x).imag = xi; \ -} -#define bli_zcinvscalcjs( conj, a, x ) \ -{ \ - float aimag = ( bli_is_conj( conj ) ? ( float ) -(a).imag : \ - ( float ) (a).imag ); \ - float temp = ( float ) (a).real * (a).real + ( float ) aimag * (a).imag; \ - float xr = ( float ) ( ( float ) (a).real * (x).real + ( float ) aimag * (x).imag ) / temp; \ - float xi = ( float ) ( ( float ) (a).real * (x).imag - ( float ) aimag * (x).real ) / temp; \ - (x).real = xr; \ - (x).imag = xi; \ -} - -#define bli_szinvscalcjs( conj, a, x ) \ -{ \ - (x).real /= ( double ) (a); \ - (x).imag /= ( double ) (a); \ -} -#define bli_dzinvscalcjs( conj, a, x ) \ -{ \ - (x).real /= ( double ) (a); \ - (x).imag /= ( double ) (a); \ -} -#define bli_czinvscalcjs( conj, a, x ) \ -{ \ - double aimag = ( bli_is_conj( conj ) ? ( double ) -(a).imag : \ - ( double ) (a).imag ); \ - double temp = ( double ) (a).real * (a).real + ( double ) aimag * (a).imag; \ - double xr = ( double ) ( ( double ) (a).real * (x).real + ( double ) aimag * (x).imag ) / temp; \ - double xi = ( double ) ( ( double ) (a).real * (x).imag - ( double ) aimag * (x).real ) / temp; \ - (x).real = xr; \ - (x).imag = xi; \ -} -#define bli_zzinvscalcjs( conj, a, x ) \ -{ \ - double aimag = ( bli_is_conj( conj ) ? ( double ) -(a).imag : \ - ( double ) (a).imag ); \ - double temp = ( double ) (a).real * (a).real + ( double ) aimag * (a).imag; \ - double xr = ( double ) ( ( double ) (a).real * (x).real + ( double ) aimag * (x).imag ) / temp; \ - double xi = ( double ) ( ( double ) (a).real * (x).imag - ( double ) aimag * (x).real ) / temp; \ - (x).real = xr; \ - (x).imag = xi; \ -} - - -#define bli_sinvscalcjs( conj, a, x ) \ -{ \ - bli_ssinvscalcjs( conj, a, x ); \ -} -#define bli_dinvscalcjs( conj, a, x ) \ -{ \ - bli_ddinvscalcjs( conj, a, x ); \ -} -#define bli_cinvscalcjs( conj, a, x ) \ -{ \ - bli_ccinvscalcjs( conj, a, x ); \ -} -#define bli_zinvscalcjs( conj, a, x ) \ -{ \ - bli_zzinvscalcjs( conj, a, x ); \ -} - - -#endif diff --git a/frame/include/level0/old/bli_scalcjs.h b/frame/include/level0/old/bli_scalcjs.h deleted file mode 100644 index 3072ca7e29..0000000000 --- a/frame/include/level0/old/bli_scalcjs.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCALCJS_H -#define BLIS_SCALCJS_H - -// scalcjs - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - a is (conditionally) used in conjugated form. - -#define bli_ssscalcjs( conj, a, x ) \ -{ \ - (x) *= ( float ) (a); \ -} -#define bli_dsscalcjs( conj, a, x ) \ -{ \ - (x) *= ( float ) (a); \ -} -#define bli_csscalcjs( conj, a, x ) \ -{ \ - (x) *= ( float ) (a).real; \ -} -#define bli_zsscalcjs( conj, a, x ) \ -{ \ - (x) *= ( float ) (a).real; \ -} - -#define bli_sdscalcjs( conj, a, x ) \ -{ \ - (x) *= ( double ) (a); \ -} -#define bli_ddscalcjs( conj, a, x ) \ -{ \ - (x) *= ( double ) (a); \ -} -#define bli_cdscalcjs( conj, a, x ) \ -{ \ - (x) *= ( double ) (a).real; \ -} -#define bli_zdscalcjs( conj, a, x ) \ -{ \ - (x) *= ( double ) (a).real; \ -} - -#define bli_scscalcjs( conj, a, x ) \ -{ \ - (x).real *= ( float ) (a); \ - (x).imag *= ( float ) (a); \ -} -#define bli_dcscalcjs( conj, a, x ) \ -{ \ - (x).real *= ( float ) (a); \ - (x).imag *= ( float ) (a); \ -} -#define bli_ccscalcjs( conj, a, x ) \ -{ \ - float aimag = ( bli_is_conj( conj ) ? ( float ) -(a).imag : \ - ( float ) (a).imag ); \ - float tempr = ( float ) (a).real * (x).real - ( float ) aimag * (x).imag; \ - float tempi = ( float ) (a).real * (x).imag + ( float ) aimag * (x).real; \ - (x).real = tempr; \ - (x).imag = tempi; \ -} -#define bli_zcscalcjs( conj, a, x ) \ -{ \ - float aimag = ( bli_is_conj( conj ) ? ( float ) -(a).imag : \ - ( float ) (a).imag ); \ - float tempr = ( float ) (a).real * (x).real - ( float ) aimag * (x).imag; \ - float tempi = ( float ) (a).real * (x).imag + ( float ) aimag * (x).real; \ - (x).real = tempr; \ - (x).imag = tempi; \ -} - -#define bli_szscalcjs( conj, a, x ) \ -{ \ - (x).real *= ( double ) (a); \ - (x).imag *= ( double ) (a); \ -} -#define bli_dzscalcjs( conj, a, x ) \ -{ \ - (x).real *= ( double ) (a); \ - (x).imag *= ( double ) (a); \ -} -#define bli_czscalcjs( conj, a, x ) \ -{ \ - double aimag = ( bli_is_conj( conj ) ? ( double ) -(a).imag : \ - ( double ) (a).imag ); \ - double tempr = ( double ) (a).real * (x).real - ( double ) aimag * (x).imag; \ - double tempi = ( double ) (a).real * (x).imag + ( double ) aimag * (x).real; \ - (x).real = tempr; \ - (x).imag = tempi; \ -} -#define bli_zzscalcjs( conj, a, x ) \ -{ \ - double aimag = ( bli_is_conj( conj ) ? ( double ) -(a).imag : \ - ( double ) (a).imag ); \ - double tempr = ( double ) (a).real * (x).real - ( double ) aimag * (x).imag; \ - double tempi = ( double ) (a).real * (x).imag + ( double ) aimag * (x).real; \ - (x).real = tempr; \ - (x).imag = tempi; \ -} - - -#define bli_sscalcjs( conj, a, x ) \ -{ \ - bli_ssscalcjs( conj, a, x ); \ -} -#define bli_dscalcjs( conj, a, x ) \ -{ \ - bli_ddscalcjs( conj, a, x ); \ -} -#define bli_cscalcjs( conj, a, x ) \ -{ \ - bli_ccscalcjs( conj, a, x ); \ -} -#define bli_zscalcjs( conj, a, x ) \ -{ \ - bli_zzscalcjs( conj, a, x ); \ -} - - -#endif diff --git a/frame/include/level0/old/bli_set0ris_mxn.h b/frame/include/level0/old/bli_set0ris_mxn.h deleted file mode 100644 index 212ef4742b..0000000000 --- a/frame/include/level0/old/bli_set0ris_mxn.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SET0RIS_MXN_H -#define BLIS_SET0RIS_MXN_H - -// set0ris_mxn - -#define bli_sset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - bli_sset0ris( *(ar + _i*rs_a + _j*cs_a), \ - *(ai + _i*rs_a + _j*cs_a) ); \ -} - -#define bli_dset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - bli_dset0ris( *(ar + _i*rs_a + _j*cs_a), \ - *(ai + _i*rs_a + _j*cs_a) ); \ -} - -#define bli_cset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - bli_cset0ris( *(ar + _i*rs_a + _j*cs_a), \ - *(ai + _i*rs_a + _j*cs_a) ); \ -} - -#define bli_zset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - bli_zset0ris( *(ar + _i*rs_a + _j*cs_a), \ - *(ai + _i*rs_a + _j*cs_a) ); \ -} - - -#endif diff --git a/frame/include/level0/old/io/bli_scal2ios.h b/frame/include/level0/old/io/bli_scal2ios.h deleted file mode 100644 index 6b24151711..0000000000 --- a/frame/include/level0/old/io/bli_scal2ios.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyiight - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyiight - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2IOS_H -#define BLIS_SCAL2IOS_H - -// scal2ios - -#define bli_cscal2ios( a, x, yi ) \ -{ \ - (yi) = bli_cimag(a) * bli_creal(x) + bli_creal(a) * bli_cimag(x); \ -} - -#define bli_zscal2ios( a, x, yi ) \ -{ \ - (yi) = bli_zimag(a) * bli_zreal(x) + bli_zreal(a) * bli_zimag(x); \ -} - -#define bli_scscal2ios( a, x, yi ) \ -{ \ - (yi) = bli_creal(a) * bli_cimag(x); \ -} - -#define bli_dzscal2ios( a, x, yi ) \ -{ \ - (yi) = bli_zreal(a) * bli_zimag(x); \ -} - -#endif - diff --git a/frame/include/level0/old/io/bli_scal2jios.h b/frame/include/level0/old/io/bli_scal2jios.h deleted file mode 100644 index 941b6044c2..0000000000 --- a/frame/include/level0/old/io/bli_scal2jios.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyiight - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyiight - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2JIOS_H -#define BLIS_SCAL2JIOS_H - -// scal2jios - -#define bli_cscal2jios( a, x, yi ) \ -{ \ - (yi) = bli_cimag(a) * bli_creal(x) - bli_creal(a) * bli_cimag(x); \ -} - -#define bli_zscal2jios( a, x, yi ) \ -{ \ - (yi) = bli_zimag(a) * bli_zreal(x) - bli_zreal(a) * bli_zimag(x); \ -} - - -#endif - diff --git a/frame/include/level0/old/ri3/bli_copyjri3s.h b/frame/include/level0/old/ri3/bli_copyjri3s.h deleted file mode 100644 index 6be9e36191..0000000000 --- a/frame/include/level0/old/ri3/bli_copyjri3s.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPYJRI3S_H -#define BLIS_COPYJRI3S_H - -// copyjri3s - -#define bli_scopyjri3s( ar, ai, br, bi, bri ) bli_scopyri3s( (ar), -(ai), (br), (bi), (bri) ) -#define bli_dcopyjri3s( ar, ai, br, bi, bri ) bli_dcopyri3s( (ar), -(ai), (br), (bi), (bri) ) -#define bli_ccopyjri3s( ar, ai, br, bi, bri ) bli_ccopyri3s( (ar), -(ai), (br), (bi), (bri) ) -#define bli_zcopyjri3s( ar, ai, br, bi, bri ) bli_zcopyri3s( (ar), -(ai), (br), (bi), (bri) ) - -#endif - diff --git a/frame/include/level0/old/ri3/bli_scal2ri3s_mxn.h b/frame/include/level0/old/ri3/bli_scal2ri3s_mxn.h deleted file mode 100644 index 2316f0738c..0000000000 --- a/frame/include/level0/old/ri3/bli_scal2ri3s_mxn.h +++ /dev/null @@ -1,183 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2RI3S_MXN_H -#define BLIS_SCAL2RI3S_MXN_H - -// scal2ri3s_mxn - -BLIS_INLINE void bli_cscal2ri3s_mxn - ( - const conj_t conjx, - const dim_t m, - const dim_t n, - scomplex* restrict alpha, - scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t is_y - ) -{ - float* restrict alpha_r = ( float* )alpha; \ - float* restrict alpha_i = ( float* )alpha + 1; \ - float* restrict x_r = ( float* )x; \ - float* restrict x_i = ( float* )x + 1; \ - float* restrict y_r = ( float* )y; \ - float* restrict y_i = ( float* )y + is_y; \ - float* restrict y_rpi = ( float* )y + 2*is_y; \ - const dim_t incx2 = 2*rs_x; \ - const dim_t ldx2 = 2*cs_x; \ - - /* Treat the micro-panel as panel_dim x panel_len and column-stored - (unit row stride). */ \ - - if ( bli_is_conj( conjx ) ) - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - float* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; - float* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; - float* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; - float* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; - float* restrict psi11_rpi = y_rpi + (i )*1 + (j )*cs_y; - - bli_cscal2jri3s - ( - *alpha_r, - *alpha_i, - *chi11_r, - *chi11_i, - *psi11_r, - *psi11_i, - *psi11_rpi - ); - } - } - else /* if ( bli_is_noconj( conjx ) ) */ - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - float* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; - float* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; - float* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; - float* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; - float* restrict psi11_rpi = y_rpi + (i )*1 + (j )*cs_y; - - bli_cscal2ri3s - ( - *alpha_r, - *alpha_i, - *chi11_r, - *chi11_i, - *psi11_r, - *psi11_i, - *psi11_rpi - ); - } - } -} - -BLIS_INLINE void bli_zscal2ri3s_mxn - ( - const conj_t conjx, - const dim_t m, - const dim_t n, - dcomplex* restrict alpha, - dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t is_y - ) -{ - double* restrict alpha_r = ( double* )alpha; \ - double* restrict alpha_i = ( double* )alpha + 1; \ - double* restrict x_r = ( double* )x; \ - double* restrict x_i = ( double* )x + 1; \ - double* restrict y_r = ( double* )y; \ - double* restrict y_i = ( double* )y + is_y; \ - double* restrict y_rpi = ( double* )y + 2*is_y; \ - const dim_t incx2 = 2*rs_x; \ - const dim_t ldx2 = 2*cs_x; \ - - /* Treat the micro-panel as panel_dim x panel_len and column-stored - (unit row stride). */ \ - - if ( bli_is_conj( conjx ) ) - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - double* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; - double* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; - double* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; - double* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; - double* restrict psi11_rpi = y_rpi + (i )*1 + (j )*cs_y; - - bli_zscal2jri3s - ( - *alpha_r, - *alpha_i, - *chi11_r, - *chi11_i, - *psi11_r, - *psi11_i, - *psi11_rpi - ); - } - } - else /* if ( bli_is_noconj( conjx ) ) */ - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - double* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; - double* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; - double* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; - double* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; - double* restrict psi11_rpi = y_rpi + (i )*1 + (j )*cs_y; - - bli_zscal2ri3s - ( - *alpha_r, - *alpha_i, - *chi11_r, - *chi11_i, - *psi11_r, - *psi11_i, - *psi11_rpi - ); - } - } -} - - -#endif diff --git a/frame/include/level0/old/rih/bli_scal2rihs_mxn.h b/frame/include/level0/old/rih/bli_scal2rihs_mxn.h deleted file mode 100644 index ca117b85d9..0000000000 --- a/frame/include/level0/old/rih/bli_scal2rihs_mxn.h +++ /dev/null @@ -1,283 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2RIHS_MXN_H -#define BLIS_SCAL2RIHS_MXN_H - -// scal2rihs_mxn - -BLIS_INLINE void bli_cscal2rihs_mxn - ( - const pack_t schema, - const conj_t conjx, - const dim_t m, - const dim_t n, - scomplex* restrict alpha, - scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - scomplex* restrict x_r = x; - float* restrict y_r = ( float* )y; - - if ( bli_is_ro_packed( schema ) ) - { - if ( bli_is_conj( conjx ) ) - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - scomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; - float* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; - - bli_cscal2jros - ( - *alpha, - *chi11, - *psi11_r - ); - } - } - else /* if ( bli_is_noconj( conjx ) ) */ - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - scomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; - float* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; - - bli_cscal2ros - ( - *alpha, - *chi11, - *psi11_r - ); - } - } - } - else if ( bli_is_io_packed( schema ) ) - { - if ( bli_is_conj( conjx ) ) - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - scomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; - float* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; - - bli_cscal2jios - ( - *alpha, - *chi11, - *psi11_r - ); - } - } - else /* if ( bli_is_noconj( conjx ) ) */ - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - scomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; - float* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; - - bli_cscal2ios - ( - *alpha, - *chi11, - *psi11_r - ); - } - } - } - else /* if ( bli_is_rpi_packed( schema ) ) */ - { - if ( bli_is_conj( conjx ) ) - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - scomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; - float* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; - - bli_cscal2jrpis - ( - *alpha, - *chi11, - *psi11_r - ); - } - } - else /* if ( bli_is_noconj( conjx ) ) */ - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - scomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; - float* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; - - bli_cscal2rpis - ( - *alpha, - *chi11, - *psi11_r - ); - } - } - } -} - -BLIS_INLINE void bli_zscal2rihs_mxn - ( - const pack_t schema, - const conj_t conjx, - const dim_t m, - const dim_t n, - dcomplex* restrict alpha, - dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y - ) -{ - dcomplex* restrict x_r = x; - double* restrict y_r = ( double* )y; - - if ( bli_is_ro_packed( schema ) ) - { - if ( bli_is_conj( conjx ) ) - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - dcomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; - double* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; - - bli_zscal2jros - ( - *alpha, - *chi11, - *psi11_r - ); - } - } - else /* if ( bli_is_noconj( conjx ) ) */ - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - dcomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; - double* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; - - bli_zscal2ros - ( - *alpha, - *chi11, - *psi11_r - ); - } - } - } - else if ( bli_is_io_packed( schema ) ) - { - if ( bli_is_conj( conjx ) ) - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - dcomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; - double* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; - - bli_zscal2jios - ( - *alpha, - *chi11, - *psi11_r - ); - } - } - else /* if ( bli_is_noconj( conjx ) ) */ - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - dcomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; - double* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; - - bli_zscal2ios - ( - *alpha, - *chi11, - *psi11_r - ); - } - } - } - else /* if ( bli_is_rpi_packed( schema ) ) */ - { - if ( bli_is_conj( conjx ) ) - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - dcomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; - double* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; - - bli_zscal2jrpis - ( - *alpha, - *chi11, - *psi11_r - ); - } - } - else /* if ( bli_is_noconj( conjx ) ) */ - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - dcomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; - double* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; - - bli_zscal2rpis - ( - *alpha, - *chi11, - *psi11_r - ); - } - } - } -} - - -#endif diff --git a/frame/include/level0/old/rih/bli_scal2rihs_mxn_diag.h b/frame/include/level0/old/rih/bli_scal2rihs_mxn_diag.h deleted file mode 100644 index 79897755e6..0000000000 --- a/frame/include/level0/old/rih/bli_scal2rihs_mxn_diag.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2RIHS_MXN_DIAG_H -#define BLIS_SCAL2RIHS_MXN_DIAG_H - -// scal2rihs_mxn_diag - -#define bli_cscscal2rihs_mxn_diag( schema, m, n, a, x, rs_x, cs_x, y_r, rs_y, cs_y ) \ -{ \ - dim_t min_m_n = bli_min( m, n ); \ - dim_t _i; \ -\ - /* Handle ro, io, and rpi separately. */ \ - if ( bli_is_ro_packed( schema ) ) \ - { \ - for ( _i = 0; _i < min_m_n; ++_i ) \ - { \ - bli_scscal2ros( *(x + _i*rs_x + _i*cs_x), \ - *(a), \ - *(y_r + _i*rs_y + _i*cs_y) ); \ - } \ - } \ - else if ( bli_is_io_packed( schema ) ) \ - { \ - for ( _i = 0; _i < min_m_n; ++_i ) \ - { \ - bli_scscal2ios( *(x + _i*rs_x + _i*cs_x), \ - *(a), \ - *(y_r + _i*rs_y + _i*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_rpi_packed( schema ) ) */ \ - { \ - for ( _i = 0; _i < min_m_n; ++_i ) \ - { \ - bli_scscal2rpis( *(x + _i*rs_x + _i*cs_x), \ - *(a), \ - *(y_r + _i*rs_y + _i*cs_y) ); \ - } \ - } \ -} - -#define bli_zdzscal2rihs_mxn_diag( schema, m, n, a, x, rs_x, cs_x, y_r, rs_y, cs_y ) \ -{ \ - dim_t min_m_n = bli_min( m, n ); \ - dim_t _i; \ -\ - /* Handle ro, io, and rpi separately. */ \ - if ( bli_is_ro_packed( schema ) ) \ - { \ - for ( _i = 0; _i < min_m_n; ++_i ) \ - { \ - bli_dzscal2ros( *(x + _i*rs_x + _i*cs_x), \ - *(a), \ - *(y_r + _i*rs_y + _i*cs_y) ); \ - } \ - } \ - else if ( bli_is_io_packed( schema ) ) \ - { \ - for ( _i = 0; _i < min_m_n; ++_i ) \ - { \ - bli_dzscal2ios( *(x + _i*rs_x + _i*cs_x), \ - *(a), \ - *(y_r + _i*rs_y + _i*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_rpi_packed( schema ) ) */ \ - { \ - for ( _i = 0; _i < min_m_n; ++_i ) \ - { \ - bli_dzscal2rpis( *(x + _i*rs_x + _i*cs_x), \ - *(a), \ - *(y_r + _i*rs_y + _i*cs_y) ); \ - } \ - } \ -} - -#endif diff --git a/frame/include/level0/old/rih/bli_scal2rihs_mxn_uplo.h b/frame/include/level0/old/rih/bli_scal2rihs_mxn_uplo.h deleted file mode 100644 index 6c26fadd4c..0000000000 --- a/frame/include/level0/old/rih/bli_scal2rihs_mxn_uplo.h +++ /dev/null @@ -1,348 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2RIHS_MXN_UPLO_H -#define BLIS_SCAL2RIHS_MXN_UPLO_H - -// scal2rihs_mxn_uplo - -#define bli_cscal2rihs_mxn_uplo( schema, uplo, conjx, m, a, x, rs_x, cs_x, y_r, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - /* Handle ro, io, and rpi separately. */ \ - if ( bli_is_ro_packed( schema ) ) \ - { \ - if ( bli_is_lower( uplo ) ) \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = _j; _i < m; ++_i ) \ - { \ - bli_cscal2jros( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = _j; _i < m; ++_i ) \ - { \ - bli_cscal2ros( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ - else /* if ( bli_is_upper( uplo ) ) */ \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = 0; _i < _j + 1; ++_i ) \ - { \ - bli_cscal2jros( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = 0; _i < _j + 1; ++_i ) \ - { \ - bli_cscal2ros( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ - } \ - else if ( bli_is_io_packed( schema ) ) \ - { \ - if ( bli_is_lower( uplo ) ) \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = _j; _i < m; ++_i ) \ - { \ - bli_cscal2jios( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = _j; _i < m; ++_i ) \ - { \ - bli_cscal2ios( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ - else /* if ( bli_is_upper( uplo ) ) */ \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = 0; _i < _j + 1; ++_i ) \ - { \ - bli_cscal2jios( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = 0; _i < _j + 1; ++_i ) \ - { \ - bli_cscal2ios( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ - } \ - else /* if ( bli_is_rpi_packed( schema ) ) */ \ - { \ - if ( bli_is_lower( uplo ) ) \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = _j; _i < m; ++_i ) \ - { \ - bli_cscal2jrpis( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = _j; _i < m; ++_i ) \ - { \ - bli_cscal2rpis( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ - else /* if ( bli_is_upper( uplo ) ) */ \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = 0; _i < _j + 1; ++_i ) \ - { \ - bli_cscal2jrpis( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = 0; _i < _j + 1; ++_i ) \ - { \ - bli_cscal2rpis( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ - } \ -} - -#define bli_zscal2rihs_mxn_uplo( schema, uplo, conjx, m, a, x, rs_x, cs_x, y_r, rs_y, cs_y ) \ -{ \ - dim_t _i, _j; \ -\ - /* Handle ro, io, and rpi separately. */ \ - if ( bli_is_ro_packed( schema ) ) \ - { \ - if ( bli_is_lower( uplo ) ) \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = _j; _i < m; ++_i ) \ - { \ - bli_zscal2jros( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = _j; _i < m; ++_i ) \ - { \ - bli_zscal2ros( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ - else /* if ( bli_is_upper( uplo ) ) */ \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = 0; _i < _j + 1; ++_i ) \ - { \ - bli_zscal2jros( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = 0; _i < _j + 1; ++_i ) \ - { \ - bli_zscal2ros( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ - } \ - else if ( bli_is_io_packed( schema ) ) \ - { \ - if ( bli_is_lower( uplo ) ) \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = _j; _i < m; ++_i ) \ - { \ - bli_zscal2jios( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = _j; _i < m; ++_i ) \ - { \ - bli_zscal2ios( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ - else /* if ( bli_is_upper( uplo ) ) */ \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = 0; _i < _j + 1; ++_i ) \ - { \ - bli_zscal2jios( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = 0; _i < _j + 1; ++_i ) \ - { \ - bli_zscal2ios( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ - } \ - else /* if ( bli_is_rpi_packed( schema ) ) */ \ - { \ - if ( bli_is_lower( uplo ) ) \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = _j; _i < m; ++_i ) \ - { \ - bli_zscal2jrpis( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = _j; _i < m; ++_i ) \ - { \ - bli_zscal2rpis( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ - else /* if ( bli_is_upper( uplo ) ) */ \ - { \ - if ( bli_is_conj( conjx ) ) \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = 0; _i < _j + 1; ++_i ) \ - { \ - bli_zscal2jrpis( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_noconj( conjx ) ) */ \ - { \ - for ( _j = 0; _j < m; ++_j ) \ - for ( _i = 0; _i < _j + 1; ++_i ) \ - { \ - bli_zscal2rpis( *(a), \ - *(x + _i*rs_x + _j*cs_x), \ - *(y_r + _i*rs_y + _j*cs_y) ); \ - } \ - } \ - } \ - } \ -} - -#endif diff --git a/frame/include/level0/old/rih/bli_setrihs_mxn_diag.h b/frame/include/level0/old/rih/bli_setrihs_mxn_diag.h deleted file mode 100644 index 33584deb86..0000000000 --- a/frame/include/level0/old/rih/bli_setrihs_mxn_diag.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SETRIHS_MXN_DIAG_H -#define BLIS_SETRIHS_MXN_DIAG_H - -// setrihs_mxn_diag - -#define bli_csetrihs_mxn_diag( schema, m, n, a, y_r, rs_y, cs_y ) \ -{ \ - const float a_r = bli_zreal( *a ); \ - const float a_i = bli_zimag( *a ); \ - dim_t min_m_n = bli_min( m, n ); \ - dim_t _i; \ -\ - /* Handle ro, io, and rpi separately. */ \ - if ( bli_is_ro_packed( schema ) ) \ - { \ - for ( _i = 0; _i < min_m_n; ++_i ) \ - { \ - bli_scopys( (a_r), \ - *(y_r + _i*rs_y + _i*cs_y) ); \ - } \ - } \ - else if ( bli_is_io_packed( schema ) ) \ - { \ - for ( _i = 0; _i < min_m_n; ++_i ) \ - { \ - bli_scopys( (a_i), \ - *(y_r + _i*rs_y + _i*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_rpi_packed( schema ) ) */ \ - { \ - for ( _i = 0; _i < min_m_n; ++_i ) \ - { \ - bli_sadd3s( (a_r), \ - (a_i), \ - *(y_r + _i*rs_y + _i*cs_y) ); \ - } \ - } \ -} - -#define bli_zsetrihs_mxn_diag( schema, m, n, a, y_r, rs_y, cs_y ) \ -{ \ - const double a_r = bli_zreal( *a ); \ - const double a_i = bli_zimag( *a ); \ - dim_t min_m_n = bli_min( m, n ); \ - dim_t _i; \ -\ - /* Handle ro, io, and rpi separately. */ \ - if ( bli_is_ro_packed( schema ) ) \ - { \ - for ( _i = 0; _i < min_m_n; ++_i ) \ - { \ - bli_dcopys( (a_r), \ - *(y_r + _i*rs_y + _i*cs_y) ); \ - } \ - } \ - else if ( bli_is_io_packed( schema ) ) \ - { \ - for ( _i = 0; _i < min_m_n; ++_i ) \ - { \ - bli_dcopys( (a_i), \ - *(y_r + _i*rs_y + _i*cs_y) ); \ - } \ - } \ - else /* if ( bli_is_rpi_packed( schema ) ) */ \ - { \ - for ( _i = 0; _i < min_m_n; ++_i ) \ - { \ - bli_dadd3s( (a_r), \ - (a_i), \ - *(y_r + _i*rs_y + _i*cs_y) ); \ - } \ - } \ -} - -#endif diff --git a/frame/include/level0/old/ro/bli_scal2jros.h b/frame/include/level0/old/ro/bli_scal2jros.h deleted file mode 100644 index be7b43fb05..0000000000 --- a/frame/include/level0/old/ro/bli_scal2jros.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2JROS_H -#define BLIS_SCAL2JROS_H - -// scal2jros - -#define bli_cscal2jros( a, x, yr ) \ -{ \ - (yr) = bli_creal(a) * bli_creal(x) + bli_cimag(a) * bli_cimag(x); \ -} - -#define bli_zscal2jros( a, x, yr ) \ -{ \ - (yr) = bli_zreal(a) * bli_zreal(x) + bli_zimag(a) * bli_zimag(x); \ -} - -#endif - diff --git a/frame/include/level0/old/ro/bli_scal2ros.h b/frame/include/level0/old/ro/bli_scal2ros.h deleted file mode 100644 index 5f68de5ab3..0000000000 --- a/frame/include/level0/old/ro/bli_scal2ros.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2ROS_H -#define BLIS_SCAL2ROS_H - -// scal2ros - -#define bli_cscal2ros( a, x, yr ) \ -{ \ - (yr) = bli_creal(a) * bli_creal(x) - bli_cimag(a) * bli_cimag(x); \ -} - -#define bli_zscal2ros( a, x, yr ) \ -{ \ - (yr) = bli_zreal(a) * bli_zreal(x) - bli_zimag(a) * bli_zimag(x); \ -} - -#define bli_scscal2ros( a, x, yr ) \ -{ \ - (yr) = bli_creal(a) * bli_creal(x); \ -} - -#define bli_dzscal2ros( a, x, yr ) \ -{ \ - (yr) = bli_zreal(a) * bli_zreal(x); \ -} - - -#endif - diff --git a/frame/include/level0/old/rpi/bli_scal2jrpis.h b/frame/include/level0/old/rpi/bli_scal2jrpis.h deleted file mode 100644 index 718baa425c..0000000000 --- a/frame/include/level0/old/rpi/bli_scal2jrpis.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyrpiight - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyrpiight - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2JRPIS_H -#define BLIS_SCAL2JRPIS_H - -// scal2jrpis - -#define bli_cscal2jrpis( a, x, yrpi ) \ -{ \ - (yrpi) = (bli_creal(a)+bli_cimag(a)) * bli_creal(x) + \ - (bli_cimag(a)-bli_creal(a)) * bli_cimag(x); \ -} - -#define bli_zscal2jrpis( a, x, yrpi ) \ -{ \ - (yrpi) = (bli_zreal(a)+bli_zimag(a)) * bli_zreal(x) + \ - (bli_zimag(a)-bli_zreal(a)) * bli_zimag(x); \ -} - -#endif - diff --git a/frame/include/level0/old/rpi/bli_scal2rpis.h b/frame/include/level0/old/rpi/bli_scal2rpis.h deleted file mode 100644 index 159cdc3352..0000000000 --- a/frame/include/level0/old/rpi/bli_scal2rpis.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2RPIS_H -#define BLIS_SCAL2RPIS_H - -// scal2rpis - -#define bli_cscal2rpis( a, x, yrpi ) \ -{ \ - (yrpi) = (bli_creal(a)+bli_cimag(a)) * bli_creal(x) + \ - (bli_creal(a)-bli_cimag(a)) * bli_cimag(x); \ -} - -#define bli_zscal2rpis( a, x, yrpi ) \ -{ \ - (yrpi) = (bli_zreal(a)+bli_zimag(a)) * bli_zreal(x) + \ - (bli_zreal(a)-bli_zimag(a)) * bli_zimag(x); \ -} - -#define bli_scscal2rpis( a, x, yrpi ) \ -{ \ - (yrpi) = bli_creal(a) * bli_creal(x) + \ - bli_creal(a) * bli_cimag(x); \ -} - -#define bli_dzscal2rpis( a, x, yrpi ) \ -{ \ - (yrpi) = bli_zreal(a) * bli_zreal(x) + \ - bli_zreal(a) * bli_zimag(x); \ -} - - -#endif - diff --git a/frame/include/level0/ri/bli_absq2ris.h b/frame/include/level0/ri/bli_absq2ris.h deleted file mode 100644 index 6698a51a1b..0000000000 --- a/frame/include/level0/ri/bli_absq2ris.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_ABSQ2RIS_H -#define BLIS_ABSQ2RIS_H - -// absq2ris - -#define bli_sabsq2ris( ar, ai, br, bi ) \ -{ \ - (br) = (ar) * (ar); \ -} - -#define bli_dabsq2ris( ar, ai, br, bi ) \ -{ \ - (br) = (ar) * (ar); \ -} - -#define bli_cabsq2ris( ar, ai, br, bi ) \ -{ \ - (br) = (ar) * (ar) + (ai) * (ai); \ - (bi) = 0.0F; \ -} - -#define bli_zabsq2ris( ar, ai, br, bi ) \ -{ \ - (br) = (ar) * (ar) + (ai) * (ai); \ - (bi) = 0.0; \ -} - -#endif - diff --git a/frame/include/level0/ri/bli_abval2ris.h b/frame/include/level0/ri/bli_abval2ris.h deleted file mode 100644 index fc0ca2c3e2..0000000000 --- a/frame/include/level0/ri/bli_abval2ris.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_ABVAL2RIS_H -#define BLIS_ABVAL2RIS_H - -// abval2ris - -#define bli_sabval2ris( xr, xi, ar, ai ) \ -{ \ - (ar) = fabsf(xr); \ -} - -#define bli_dabval2ris( xr, xi, ar, ai ) \ -{ \ - (ar) = fabs(xr); \ -} - -#define bli_cabval2ris( xr, xi, ar, ai ) \ -{ \ - float s = bli_fmaxabs( (xr), (xi) ); \ - float mag; \ - if ( s == 0.0F ) mag = 0.0F; \ - else \ - { \ - mag = sqrtf( s ) * \ - sqrtf( ( (xr) / s ) * (xr) + \ - ( (xi) / s ) * (xi) ); \ - } \ - (ar) = mag; \ - (ai) = 0.0F; \ -} - -#define bli_zabval2ris( xr, xi, ar, ai ) \ -{ \ - double s = bli_fmaxabs( (xr), (xi) ); \ - double mag; \ - if ( s == 0.0 ) mag = 0.0; \ - else \ - { \ - mag = sqrt( s ) * \ - sqrt( ( (xr) / s ) * (xr) + \ - ( (xi) / s ) * (xi) ); \ - } \ - (ar) = mag; \ - (ai) = 0.0; \ -} - -#endif diff --git a/frame/include/level0/ri/bli_add3ris.h b/frame/include/level0/ri/bli_add3ris.h deleted file mode 100644 index 8c686568c2..0000000000 --- a/frame/include/level0/ri/bli_add3ris.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_ADD3RIS_H -#define BLIS_ADD3RIS_H - -// add3ris - -#define bli_sadd3ris( ar, ai, br, bi, cr, ci ) \ -{ \ - (cr) = (ar) + (br); \ -} - -#define bli_dadd3ris( ar, ai, br, bi, cr, ci ) \ -{ \ - (cr) = (ar) + (br); \ -} - -#define bli_cadd3ris( ar, ai, br, bi, cr, ci ) \ -{ \ - (cr) = (ar) + (br); \ - (ci) = (ai) + (bi); \ -} - -#define bli_zadd3ris( ar, ai, br, bi, cr, ci ) \ -{ \ - (cr) = (ar) + (br); \ - (ci) = (ai) + (bi); \ -} - -#endif - diff --git a/frame/include/level0/ri/bli_addjris.h b/frame/include/level0/ri/bli_addjris.h deleted file mode 100644 index df1802744e..0000000000 --- a/frame/include/level0/ri/bli_addjris.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_ADDJRIS_H -#define BLIS_ADDJRIS_H - -// addjris - -#define bli_saddjris( ar, ai, xr, xi ) bli_saddris( (ar), -(ai), (xr), (xi) ) -#define bli_daddjris( ar, ai, xr, xi ) bli_daddris( (ar), -(ai), (xr), (xi) ) -#define bli_caddjris( ar, ai, xr, xi ) bli_caddris( (ar), -(ai), (xr), (xi) ) -#define bli_zaddjris( ar, ai, xr, xi ) bli_zaddris( (ar), -(ai), (xr), (xi) ) - -#endif - diff --git a/frame/include/level0/ri/bli_addris.h b/frame/include/level0/ri/bli_addris.h deleted file mode 100644 index 2efadd36f4..0000000000 --- a/frame/include/level0/ri/bli_addris.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_ADDRIS_H -#define BLIS_ADDRIS_H - -// addris - -#define bli_saddris( ar, ai, xr, xi ) \ -{ \ - (xr) = (xr) + (ar); \ -} - -#define bli_daddris( ar, ai, xr, xi ) \ -{ \ - (xr) = (xr) + (ar); \ -} - -#define bli_caddris( ar, ai, xr, xi ) \ -{ \ - (xr) = (xr) + (ar); \ - (xi) = (xi) + (ai); \ -} - -#define bli_zaddris( ar, ai, xr, xi ) \ -{ \ - (xr) = (xr) + (ar); \ - (xi) = (xi) + (ai); \ -} - -#endif - diff --git a/frame/include/level0/ri/bli_axmyris.h b/frame/include/level0/ri/bli_axmyris.h deleted file mode 100644 index f8d8992370..0000000000 --- a/frame/include/level0/ri/bli_axmyris.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_AXMYRIS_H -#define BLIS_AXMYRIS_H - -// axmyris - -#define bli_saxmyris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) -= (ar) * (xr); \ -} - -#define bli_daxmyris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) -= (ar) * (xr); \ -} - -#define bli_caxmyris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) -= (ar) * (xr) - (ai) * (xi); \ - (yi) -= (ai) * (xr) + (ar) * (xi); \ -} - -#define bli_zaxmyris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) -= (ar) * (xr) - (ai) * (xi); \ - (yi) -= (ai) * (xr) + (ar) * (xi); \ -} - -#define bli_scaxmyris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) -= (ar) * (xr); \ - (yi) -= (ar) * (xi); \ -} - -#define bli_dzaxmyris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) -= (ar) * (xr); \ - (yi) -= (ar) * (xi); \ -} - -#endif - diff --git a/frame/include/level0/ri/bli_axpbyjris.h b/frame/include/level0/ri/bli_axpbyjris.h deleted file mode 100644 index 8dc2a55978..0000000000 --- a/frame/include/level0/ri/bli_axpbyjris.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_AXPBYJRIS_H -#define BLIS_AXPBYJRIS_H - -// axpbyjris - -#define bli_rxaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ -{ \ - (yr) = (ar) * (xr) + (br) * (yr); \ -} - -#define bli_cxaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ -{ \ - const __typeof__(yr) yt_r = (ar) * (xr) + (ai) * (xi) + (br) * (yr) - (bi) * (yi); \ - const __typeof__(yi) yt_i = (ai) * (xr) - (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ - (yr) = yt_r; \ - (yi) = yt_i; \ -} - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of b. -// - The fourth char encodes the type of y. - -// -- (axby) = (??ss) ---------------------------------------------------------- - -#define bli_ssssxpbyjris bli_rxxpbyjris -#define bli_dsssxpbyjris bli_rxxpbyjris -#define bli_csssxpbyjris bli_rxxpbyjris -#define bli_zsssxpbyjris bli_rxxpbyjris - -#define bli_sdssxpbyjris bli_rxxpbyjris -#define bli_ddssxpbyjris bli_rxxpbyjris -#define bli_cdssxpbyjris bli_rxxpbyjris -#define bli_zdssxpbyjris bli_rxxpbyjris - -#define bli_scssxpbyjris bli_rxxpbyjris -#define bli_dcssxpbyjris bli_rxxpbyjris -#define bli_ccssxpbyjris bli_rxxpbyjris -#define bli_zcssxpbyjris bli_rxxpbyjris - -#define bli_szssxpbyjris bli_rxxpbyjris -#define bli_dzssxpbyjris bli_rxxpbyjris -#define bli_czssxpbyjris bli_rxxpbyjris -#define bli_zzssxpbyjris bli_rxxpbyjris - -// NOTE: This series needs to be finished for all other char values for (by), but -// not until something in BLIS actually needs mixed-datatype axpbyjris. - - -#define bli_saxpbyjris bli_ssssaxpbyjris -#define bli_daxpbyjris bli_ddddaxpbyjris -#define bli_caxpbyjris bli_ccccaxpbyjris -#define bli_zaxpbyjris bli_zzzzaxpbyjris - -#endif - diff --git a/frame/include/level0/ri/bli_axpbyris.h b/frame/include/level0/ri/bli_axpbyris.h deleted file mode 100644 index 1344749e06..0000000000 --- a/frame/include/level0/ri/bli_axpbyris.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_AXPBYRIS_H -#define BLIS_AXPBYRIS_H - -// axpbyris - -#define bli_rxaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ -{ \ - (yr) = (ar) * (xr) + (br) * (yr); \ -} - -#define bli_cxaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ -{ \ - const __typeof__(yr) yt_r = (ar) * (xr) - (ai) * (xi) + (br) * (yr) - (bi) * (yi); \ - const __typeof__(yi) yt_i = (ai) * (xr) + (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ - (yr) = yt_r; \ - (yi) = yt_i; \ -} - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of b. -// - The fourth char encodes the type of y. - -// -- (axby) = (??ss) ---------------------------------------------------------- - -#define bli_ssssxpbyris bli_rxxpbyris -#define bli_dsssxpbyris bli_rxxpbyris -#define bli_csssxpbyris bli_rxxpbyris -#define bli_zsssxpbyris bli_rxxpbyris - -#define bli_sdssxpbyris bli_rxxpbyris -#define bli_ddssxpbyris bli_rxxpbyris -#define bli_cdssxpbyris bli_rxxpbyris -#define bli_zdssxpbyris bli_rxxpbyris - -#define bli_scssxpbyris bli_rxxpbyris -#define bli_dcssxpbyris bli_rxxpbyris -#define bli_ccssxpbyris bli_rxxpbyris -#define bli_zcssxpbyris bli_rxxpbyris - -#define bli_szssxpbyris bli_rxxpbyris -#define bli_dzssxpbyris bli_rxxpbyris -#define bli_czssxpbyris bli_rxxpbyris -#define bli_zzssxpbyris bli_rxxpbyris - -// NOTE: This series needs to be finished for all other char values for (by), but -// not until something in BLIS actually needs mixed-datatype axpbyris. - - -#define bli_saxpbyris bli_ssssaxpbyris -#define bli_daxpbyris bli_ddddaxpbyris -#define bli_caxpbyris bli_ccccaxpbyris -#define bli_zaxpbyris bli_zzzzaxpbyris - -#endif - diff --git a/frame/include/level0/ri/bli_axpyjris.h b/frame/include/level0/ri/bli_axpyjris.h deleted file mode 100644 index 4525591b42..0000000000 --- a/frame/include/level0/ri/bli_axpyjris.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_AXPYJRIS_H -#define BLIS_AXPYJRIS_H - -// axpyjris - -#define bli_rxaxpyjris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) += (ar) * (xr); \ -} - -#define bli_cxaxpyjris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) += (ar) * (xr) + (ai) * (xi); \ - (yi) += (ai) * (xr) - (ar) * (xi); \ -} - -#define bli_roaxpyjris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) += (ar) * (xr) + (ai) * (xi); \ -} - -#define bli_craxpyjris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) += (ar) * (xr); \ - (yi) += (ar) * -(xi); \ -} - -#define bli_rcaxpyjris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) += (ar) * (xr); \ - (yi) += (ai) * (xr); \ -} - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of y. - -// -- (axy) = (??s) ------------------------------------------------------------ - -#define bli_sssaxpyjris bli_rxaxpyjris -#define bli_dssaxpyjris bli_rxaxpyjris -#define bli_cssaxpyjris bli_rxaxpyjris -#define bli_zssaxpyjris bli_rxaxpyjris - -#define bli_sdsaxpyjris bli_rxaxpyjris -#define bli_ddsaxpyjris bli_rxaxpyjris -#define bli_cdsaxpyjris bli_rxaxpyjris -#define bli_zdsaxpyjris bli_rxaxpyjris - -#define bli_scsaxpyjris bli_rxaxpyjris -#define bli_dcsaxpyjris bli_rxaxpyjris -#define bli_ccsaxpyjris bli_roaxpyjris -#define bli_zcsaxpyjris bli_roaxpyjris - -#define bli_szsaxpyjris bli_rxaxpyjris -#define bli_dzsaxpyjris bli_rxaxpyjris -#define bli_czsaxpyjris bli_roaxpyjris -#define bli_zzsaxpyjris bli_roaxpyjris - -// -- (axy) = (??d) ------------------------------------------------------------ - -#define bli_ssdaxpyjris bli_rxaxpyjris -#define bli_dsdaxpyjris bli_rxaxpyjris -#define bli_csdaxpyjris bli_rxaxpyjris -#define bli_zsdaxpyjris bli_rxaxpyjris - -#define bli_sddaxpyjris bli_rxaxpyjris -#define bli_dddaxpyjris bli_rxaxpyjris -#define bli_cddaxpyjris bli_rxaxpyjris -#define bli_zddaxpyjris bli_rxaxpyjris - -#define bli_scdaxpyjris bli_rxaxpyjris -#define bli_dcdaxpyjris bli_rxaxpyjris -#define bli_ccdaxpyjris bli_roaxpyjris -#define bli_zcdaxpyjris bli_roaxpyjris - -#define bli_szdaxpyjris bli_rxaxpyjris -#define bli_dzdaxpyjris bli_rxaxpyjris -#define bli_czdaxpyjris bli_roaxpyjris -#define bli_zzdaxpyjris bli_roaxpyjris - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscaxpyjris bli_rxaxpyjris -#define bli_dscaxpyjris bli_rxaxpyjris -#define bli_cscaxpyjris bli_rcaxpyjris -#define bli_zscaxpyjris bli_rcaxpyjris - -#define bli_sdcaxpyjris bli_rxaxpyjris -#define bli_ddcaxpyjris bli_rxaxpyjris -#define bli_cdcaxpyjris bli_rcaxpyjris -#define bli_zdcaxpyjris bli_rcaxpyjris - -#define bli_sccaxpyjris bli_craxpyjris -#define bli_dccaxpyjris bli_craxpyjris -#define bli_cccaxpyjris bli_cxaxpyjris -#define bli_zccaxpyjris bli_cxaxpyjris - -#define bli_szcaxpyjris bli_craxpyjris -#define bli_dzcaxpyjris bli_craxpyjris -#define bli_czcaxpyjris bli_cxaxpyjris -#define bli_zzcaxpyjris bli_cxaxpyjris - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszaxpyjris bli_rxaxpyjris -#define bli_dszaxpyjris bli_rxaxpyjris -#define bli_cszaxpyjris bli_rcaxpyjris -#define bli_zszaxpyjris bli_rcaxpyjris - -#define bli_sdzaxpyjris bli_rxaxpyjris -#define bli_ddzaxpyjris bli_rxaxpyjris -#define bli_cdzaxpyjris bli_rcaxpyjris -#define bli_zdzaxpyjris bli_rcaxpyjris - -#define bli_sczaxpyjris bli_craxpyjris -#define bli_dczaxpyjris bli_craxpyjris -#define bli_cczaxpyjris bli_cxaxpyjris -#define bli_zczaxpyjris bli_cxaxpyjris - -#define bli_szzaxpyjris bli_craxpyjris -#define bli_dzzaxpyjris bli_craxpyjris -#define bli_czzaxpyjris bli_cxaxpyjris -#define bli_zzzaxpyjris bli_cxaxpyjris - - - -#define bli_saxpyjris bli_sssaxpyjris -#define bli_daxpyjris bli_dddaxpyjris -#define bli_caxpyjris bli_cccaxpyjris -#define bli_zaxpyjris bli_zzzaxpyjris - -#endif - diff --git a/frame/include/level0/ri/bli_axpyris.h b/frame/include/level0/ri/bli_axpyris.h deleted file mode 100644 index 515e5e7903..0000000000 --- a/frame/include/level0/ri/bli_axpyris.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_AXPYRIS_H -#define BLIS_AXPYRIS_H - -// axpyris - -#define bli_rxaxpyris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) += (ar) * (xr); \ -} - -#define bli_cxaxpyris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) += (ar) * (xr) - (ai) * (xi); \ - (yi) += (ai) * (xr) + (ar) * (xi); \ -} - -#define bli_roaxpyris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) += (ar) * (xr) - (ai) * (xi); \ -} - -#define bli_craxpyris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) += (ar) * (xr); \ - (yi) += (ar) * (xi); \ -} - -#define bli_rcaxpyris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) += (ar) * (xr); \ - (yi) += (ai) * (xr); \ -} - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of y. - -// -- (axy) = (??s) ------------------------------------------------------------ - -#define bli_sssaxpyris bli_rxaxpyris -#define bli_dssaxpyris bli_rxaxpyris -#define bli_cssaxpyris bli_rxaxpyris -#define bli_zssaxpyris bli_rxaxpyris - -#define bli_sdsaxpyris bli_rxaxpyris -#define bli_ddsaxpyris bli_rxaxpyris -#define bli_cdsaxpyris bli_rxaxpyris -#define bli_zdsaxpyris bli_rxaxpyris - -#define bli_scsaxpyris bli_rxaxpyris -#define bli_dcsaxpyris bli_rxaxpyris -#define bli_ccsaxpyris bli_roaxpyris -#define bli_zcsaxpyris bli_roaxpyris - -#define bli_szsaxpyris bli_rxaxpyris -#define bli_dzsaxpyris bli_rxaxpyris -#define bli_czsaxpyris bli_roaxpyris -#define bli_zzsaxpyris bli_roaxpyris - -// -- (axy) = (??d) ------------------------------------------------------------ - -#define bli_ssdaxpyris bli_rxaxpyris -#define bli_dsdaxpyris bli_rxaxpyris -#define bli_csdaxpyris bli_rxaxpyris -#define bli_zsdaxpyris bli_rxaxpyris - -#define bli_sddaxpyris bli_rxaxpyris -#define bli_dddaxpyris bli_rxaxpyris -#define bli_cddaxpyris bli_rxaxpyris -#define bli_zddaxpyris bli_rxaxpyris - -#define bli_scdaxpyris bli_rxaxpyris -#define bli_dcdaxpyris bli_rxaxpyris -#define bli_ccdaxpyris bli_roaxpyris -#define bli_zcdaxpyris bli_roaxpyris - -#define bli_szdaxpyris bli_rxaxpyris -#define bli_dzdaxpyris bli_rxaxpyris -#define bli_czdaxpyris bli_roaxpyris -#define bli_zzdaxpyris bli_roaxpyris - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscaxpyris bli_rxaxpyris -#define bli_dscaxpyris bli_rxaxpyris -#define bli_cscaxpyris bli_rcaxpyris -#define bli_zscaxpyris bli_rcaxpyris - -#define bli_sdcaxpyris bli_rxaxpyris -#define bli_ddcaxpyris bli_rxaxpyris -#define bli_cdcaxpyris bli_rcaxpyris -#define bli_zdcaxpyris bli_rcaxpyris - -#define bli_sccaxpyris bli_craxpyris -#define bli_dccaxpyris bli_craxpyris -#define bli_cccaxpyris bli_cxaxpyris -#define bli_zccaxpyris bli_cxaxpyris - -#define bli_szcaxpyris bli_craxpyris -#define bli_dzcaxpyris bli_craxpyris -#define bli_czcaxpyris bli_cxaxpyris -#define bli_zzcaxpyris bli_cxaxpyris - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszaxpyris bli_rxaxpyris -#define bli_dszaxpyris bli_rxaxpyris -#define bli_cszaxpyris bli_rcaxpyris -#define bli_zszaxpyris bli_rcaxpyris - -#define bli_sdzaxpyris bli_rxaxpyris -#define bli_ddzaxpyris bli_rxaxpyris -#define bli_cdzaxpyris bli_rcaxpyris -#define bli_zdzaxpyris bli_rcaxpyris - -#define bli_sczaxpyris bli_craxpyris -#define bli_dczaxpyris bli_craxpyris -#define bli_cczaxpyris bli_cxaxpyris -#define bli_zczaxpyris bli_cxaxpyris - -#define bli_szzaxpyris bli_craxpyris -#define bli_dzzaxpyris bli_craxpyris -#define bli_czzaxpyris bli_cxaxpyris -#define bli_zzzaxpyris bli_cxaxpyris - - - -#define bli_saxpyris bli_sssaxpyris -#define bli_daxpyris bli_dddaxpyris -#define bli_caxpyris bli_cccaxpyris -#define bli_zaxpyris bli_zzzaxpyris - -#endif - diff --git a/frame/include/level0/ri/bli_conjris.h b/frame/include/level0/ri/bli_conjris.h deleted file mode 100644 index c4a917011e..0000000000 --- a/frame/include/level0/ri/bli_conjris.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONJRIS_H -#define BLIS_CONJRIS_H - -// conjris - -#define bli_sconjris( xr, xi ) \ -{ \ - ; \ -} - -#define bli_dconjris( xr, xi ) \ -{ \ - ; \ -} - -#define bli_cconjris( xr, xi ) \ -{ \ - (xi) = -(xi); \ -} - -#define bli_zconjris( xr, xi ) \ -{ \ - (xi) = -(xi); \ -} - -#endif - diff --git a/frame/include/level0/ri/bli_copycjris.h b/frame/include/level0/ri/bli_copycjris.h deleted file mode 100644 index c832323701..0000000000 --- a/frame/include/level0/ri/bli_copycjris.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPYCJRIS_H -#define BLIS_COPYCJRIS_H - -// copycjris - -#define bli_scopycjris( conj, xr, xi, yr, yi ) \ -{ \ - bli_scopyris( (xr), (xi), (yr), (yi) ); \ -} - -#define bli_dcopycjris( conj, xr, xi, yr, yi ) \ -{ \ - bli_dcopyris( (xr), (xi), (yr), (yi) ); \ -} - -#define bli_ccopycjris( conj, xr, xi, yr, yi ) \ -{ \ - (yr) = (xr); \ - (yi) = ( bli_is_conj( conj ) ? -(xi) \ - : (xi) ); \ -} - -#define bli_zcopycjris( conj, xr, xi, yr, yi ) \ -{ \ - (yr) = (xr); \ - (yi) = ( bli_is_conj( conj ) ? -(xi) \ - : (xi) ); \ -} - -#define bli_icopycjris( conj, xr, xi, yr, yi ) \ -{ \ - bli_icopyris( (xr), (xi), (yr), (yi) ); \ -} - -#endif diff --git a/frame/include/level0/ri/bli_copyjris.h b/frame/include/level0/ri/bli_copyjris.h deleted file mode 100644 index 86fd705423..0000000000 --- a/frame/include/level0/ri/bli_copyjris.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPYJRIS_H -#define BLIS_COPYJRIS_H - -// copyjris - -#define bli_scopyjris( ar, ai, br, bi ) bli_scopyris( (ar), -(ai), (br), (bi) ) -#define bli_dcopyjris( ar, ai, br, bi ) bli_dcopyris( (ar), -(ai), (br), (bi) ) -#define bli_ccopyjris( ar, ai, br, bi ) bli_ccopyris( (ar), -(ai), (br), (bi) ) -#define bli_zcopyjris( ar, ai, br, bi ) bli_zcopyris( (ar), -(ai), (br), (bi) ) - -#define bli_sscopyjris( ar, ai, br, bi ) bli_scopyjris( ar, 0.0F, br, bi ) -#define bli_dscopyjris( ar, ai, br, bi ) bli_scopyjris( ar, 0.0, br, bi ) -#define bli_cscopyjris( ar, ai, br, bi ) bli_scopyjris( ar, ai, br, bi ) -#define bli_zscopyjris( ar, ai, br, bi ) bli_scopyjris( ar, ai, br, bi ) - -#define bli_sdcopyjris( ar, ai, br, bi ) bli_dcopyjris( ar, 0.0F, br, bi ) -#define bli_ddcopyjris( ar, ai, br, bi ) bli_dcopyjris( ar, 0.0, br, bi ) -#define bli_cdcopyjris( ar, ai, br, bi ) bli_dcopyjris( ar, ai, br, bi ) -#define bli_zdcopyjris( ar, ai, br, bi ) bli_dcopyjris( ar, ai, br, bi ) - -#define bli_sccopyjris( ar, ai, br, bi ) bli_ccopyjris( ar, 0.0F, br, bi ) -#define bli_dccopyjris( ar, ai, br, bi ) bli_ccopyjris( ar, 0.0, br, bi ) -#define bli_cccopyjris( ar, ai, br, bi ) bli_ccopyjris( ar, ai, br, bi ) -#define bli_zccopyjris( ar, ai, br, bi ) bli_ccopyjris( ar, ai, br, bi ) - -#define bli_szcopyjris( ar, ai, br, bi ) bli_zcopyjris( ar, 0.0F, br, bi ) -#define bli_dzcopyjris( ar, ai, br, bi ) bli_zcopyjris( ar, 0.0, br, bi ) -#define bli_czcopyjris( ar, ai, br, bi ) bli_zcopyjris( ar, ai, br, bi ) -#define bli_zzcopyjris( ar, ai, br, bi ) bli_zcopyjris( ar, ai, br, bi ) - -#endif - diff --git a/frame/include/level0/ri/bli_copyris.h b/frame/include/level0/ri/bli_copyris.h deleted file mode 100644 index cd971587de..0000000000 --- a/frame/include/level0/ri/bli_copyris.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_COPYRIS_H -#define BLIS_COPYRIS_H - -// copyris - -#define bli_scopyris( ar, ai, br, bi ) \ -{ \ - (br) = (ar); \ - ( void )ai; ( void )bi; \ -} - -#define bli_dcopyris( ar, ai, br, bi ) \ -{ \ - (br) = (ar); \ - ( void )ai; ( void )bi; \ -} - -#define bli_ccopyris( ar, ai, br, bi ) \ -{ \ - (br) = (ar); \ - (bi) = (ai); \ -} - -#define bli_zcopyris( ar, ai, br, bi ) \ -{ \ - (br) = (ar); \ - (bi) = (ai); \ -} - -#define bli_sscopyris( ar, ai, br, bi ) { bli_scopyris( ar, 0.0F, br, bi ); ( void )ai; } -#define bli_dscopyris( ar, ai, br, bi ) { bli_scopyris( ar, 0.0, br, bi ); ( void )ai; } -#define bli_cscopyris( ar, ai, br, bi ) bli_scopyris( ar, ai, br, bi ) -#define bli_zscopyris( ar, ai, br, bi ) bli_scopyris( ar, ai, br, bi ) - -#define bli_sdcopyris( ar, ai, br, bi ) { bli_dcopyris( ar, 0.0F, br, bi ); ( void )ai; } -#define bli_ddcopyris( ar, ai, br, bi ) { bli_dcopyris( ar, 0.0, br, bi ); ( void )ai; } -#define bli_cdcopyris( ar, ai, br, bi ) bli_dcopyris( ar, ai, br, bi ) -#define bli_zdcopyris( ar, ai, br, bi ) bli_dcopyris( ar, ai, br, bi ) - -#define bli_sccopyris( ar, ai, br, bi ) { bli_ccopyris( ar, 0.0F, br, bi ); ( void )ai; } -#define bli_dccopyris( ar, ai, br, bi ) { bli_ccopyris( ar, 0.0, br, bi ); ( void )ai; } -#define bli_cccopyris( ar, ai, br, bi ) bli_ccopyris( ar, ai, br, bi ) -#define bli_zccopyris( ar, ai, br, bi ) bli_ccopyris( ar, ai, br, bi ) - -#define bli_szcopyris( ar, ai, br, bi ) { bli_zcopyris( ar, 0.0F, br, bi ); ( void )ai; } -#define bli_dzcopyris( ar, ai, br, bi ) { bli_zcopyris( ar, 0.0, br, bi ); ( void )ai; } -#define bli_czcopyris( ar, ai, br, bi ) bli_zcopyris( ar, ai, br, bi ) -#define bli_zzcopyris( ar, ai, br, bi ) bli_zcopyris( ar, ai, br, bi ) - -#endif diff --git a/frame/include/level0/ri/bli_eqris.h b/frame/include/level0/ri/bli_eqris.h deleted file mode 100644 index 4f84583169..0000000000 --- a/frame/include/level0/ri/bli_eqris.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_EQRIS_H -#define BLIS_EQRIS_H - - -// eqris (passed by value) - -#define bli_seqris( ar, ai, br, bi ) ( (ar) == (br) ) -#define bli_deqris( ar, ai, br, bi ) ( (ar) == (br) ) -#define bli_ceqris( ar, ai, br, bi ) ( (ar) == (br) && (ai) == (bi) ) -#define bli_zeqris( ar, ai, br, bi ) ( (ar) == (br) && (ai) == (bi) ) -#define bli_ieqris( ar, ai, br, bi ) ( (ar) == (br) ) - - -// eq1ris - -#define bli_seq1ris( ar, ai ) bli_seqris( (ar), (ai), 1.0F, 0.0F ) -#define bli_deq1ris( ar, ai ) bli_deqris( (ar), (ai), 1.0, 0.0 ) -#define bli_ceq1ris( ar, ai ) bli_ceqris( (ar), (ai), 1.0F, 0.0F ) -#define bli_zeq1ris( ar, ai ) bli_zeqris( (ar), (ai), 1.0, 0.0 ) -#define bli_ieq1ris( ar, ai ) bli_ieqris( (ar), (ai), 1, 0 ) - - -// eq0ris - -#define bli_seq0ris( ar, ai ) bli_seqris( (ar), (ai), 0.0F, 0.0F ) -#define bli_deq0ris( ar, ai ) bli_deqris( (ar), (ai), 0.0, 0.0 ) -#define bli_ceq0ris( ar, ai ) bli_ceqris( (ar), (ai), 0.0F, 0.0F ) -#define bli_zeq0ris( ar, ai ) bli_zeqris( (ar), (ai), 0.0, 0.0 ) -#define bli_ieq0ris( ar, ai ) bli_ieqris( (ar), (ai), 0, 0 ) - - -// eqm1ris - -#define bli_seqm1ris( ar, ai ) bli_seqris( (ar), (ai), -1.0F, 0.0F ) -#define bli_deqm1ris( ar, ai ) bli_deqris( (ar), (ai), -1.0, 0.0 ) -#define bli_ceqm1ris( ar, ai ) bli_ceqris( (ar), (ai), -1.0F, 0.0F ) -#define bli_zeqm1ris( ar, ai ) bli_zeqris( (ar), (ai), -1.0, 0.0 ) -#define bli_ieqm1ris( ar, ai ) bli_ieqris( (ar), (ai), -1, 0 ) - - - -#endif diff --git a/frame/include/level0/ri/bli_invertris.h b/frame/include/level0/ri/bli_invertris.h deleted file mode 100644 index aacf40ee3d..0000000000 --- a/frame/include/level0/ri/bli_invertris.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_INVERTRIS_H -#define BLIS_INVERTRIS_H - -// invertris - -#define bli_sinvertris( xr, xi ) \ -{ \ - (xr) = 1.0F / (xr); \ -} - -#define bli_dinvertris( xr, xi ) \ -{ \ - (xr) = 1.0 / (xr); \ -} - -#define bli_cinvertris( xr, xi ) \ -{ \ - float s = bli_fmaxabs( (xr), (xi) ); \ - float xr_s = (xr) / s; \ - float xi_s = (xi) / s; \ - float temp = ( xr_s * (xr) + xi_s * (xi) ); \ - (xr) = xr_s / temp; \ - (xi) = -xi_s / temp; \ -} - -#define bli_zinvertris( xr, xi ) \ -{ \ - double s = bli_fmaxabs( (xr), (xi) ); \ - double xr_s = (xr) / s; \ - double xi_s = (xi) / s; \ - double temp = ( xr_s * (xr) + xi_s * (xi) ); \ - (xr) = xr_s / temp; \ - (xi) = -xi_s / temp; \ -} - -#endif diff --git a/frame/include/level0/ri/bli_invscaljris.h b/frame/include/level0/ri/bli_invscaljris.h deleted file mode 100644 index 43d98cd780..0000000000 --- a/frame/include/level0/ri/bli_invscaljris.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_INVSCALJRIS_H -#define BLIS_INVSCALJRIS_H - -// invscaljris - -#define bli_sinvscaljris( ar, ai, xr, xi ) bli_sinvscalris( (ar), -(ai), (xr), (xi) ) -#define bli_dinvscaljris( ar, ai, xr, xi ) bli_dinvscalris( (ar), -(ai), (xr), (xi) ) -#define bli_cinvscaljris( ar, ai, xr, xi ) bli_cinvscalris( (ar), -(ai), (xr), (xi) ) -#define bli_zinvscaljris( ar, ai, xr, xi ) bli_zinvscalris( (ar), -(ai), (xr), (xi) ) - -#define bli_scinvscaljris( ar, ai, xr, xi ) bli_scinvscalris( (ar), -(ai), (xr), (xi) ) -#define bli_dzinvscaljris( ar, ai, xr, xi ) bli_dzinvscalris( (ar), -(ai), (xr), (xi) ) - -#endif - diff --git a/frame/include/level0/ri/bli_invscalris.h b/frame/include/level0/ri/bli_invscalris.h deleted file mode 100644 index 1f846ee781..0000000000 --- a/frame/include/level0/ri/bli_invscalris.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_INVSCALRIS_H -#define BLIS_INVSCALRIS_H - -// invscalris - -#define bli_sinvscalris( ar, ai, xr, xi ) \ -{ \ - (xr) /= (ar); \ -} - -#define bli_dinvscalris( ar, ai, xr, xi ) \ -{ \ - (xr) /= (ar); \ -} - -#define bli_cinvscalris( ar, ai, xr, xi ) \ -{ \ - float s = bli_fmaxabs( (ar), (ai) ); \ - float ar_s = (ar) / s; \ - float ai_s = (ai) / s; \ - float xrt = (xr); \ - float temp = ( ar_s * (ar) + ai_s * (ai) ); \ - (xr) = ( (xrt) * ar_s + (xi) * ai_s ) / temp; \ - (xi) = ( (xi) * ar_s - (xrt) * ai_s ) / temp; \ -} - -#define bli_zinvscalris( ar, ai, xr, xi ) \ -{ \ - double s = bli_fmaxabs( (ar), (ai) ); \ - double ar_s = (ar) / s; \ - double ai_s = (ai) / s; \ - double xrt = (xr); \ - double temp = ( ar_s * (ar) + ai_s * (ai) ); \ - (xr) = ( (xrt) * ar_s + (xi) * ai_s ) / temp; \ - (xi) = ( (xi) * ar_s - (xrt) * ai_s ) / temp; \ -} - -#define bli_scinvscalris( ar, ai, xr, xi ) \ -{ \ - (xr) /= (ar); \ - (xi) /= (ar); \ -} - -#define bli_dzinvscalris( ar, ai, xr, xi ) \ -{ \ - (xr) /= (ar); \ - (xi) /= (ar); \ -} - -#endif diff --git a/frame/include/level0/ri/bli_neg2ris.h b/frame/include/level0/ri/bli_neg2ris.h deleted file mode 100644 index 860b144cff..0000000000 --- a/frame/include/level0/ri/bli_neg2ris.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_NEG2RIS_H -#define BLIS_NEG2RIS_H - -// neg2ris - -#define bli_sneg2ris( ar, ai, br, bi ) \ -{ \ - (br) = -(ar); \ -} - -#define bli_dneg2ris( ar, ai, br, bi ) \ -{ \ - (br) = -(ar); \ -} - -#define bli_cneg2ris( ar, ai, br, bi ) \ -{ \ - (br) = -(ar); \ - (bi) = -(ai); \ -} - -#define bli_zneg2ris( ar, ai, br, bi ) \ -{ \ - (br) = -(ar); \ - (bi) = -(ai); \ -} - -#endif - diff --git a/frame/include/level0/ri/bli_scal2jris.h b/frame/include/level0/ri/bli_scal2jris.h deleted file mode 100644 index f3b71ed2ee..0000000000 --- a/frame/include/level0/ri/bli_scal2jris.h +++ /dev/null @@ -1,173 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2JRIS_H -#define BLIS_SCAL2JRIS_H - -// scal2jris - -#define bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) = (ar) * (xr); \ - ( void )ai; ( void )xi; ( void )yi; \ -} - -#define bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) = (ar) * (xr) + (ai) * (xi); \ - (yi) = (ai) * (xr) - (ar) * (xi); \ -} - -#define bli_roscal2jris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) = (ar) * (xr) + (ai) * (xi); \ - ( void )yi; \ -} - -#define bli_crscal2jris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) = (ar) * (xr); \ - (yi) = (ar) * -(xi); \ - ( void )ai; \ -} - -#define bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) = (ar) * (xr); \ - (yi) = (ai) * (xr); \ - ( void )xi; \ -} - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of y. - -// -- (axy) = (??s) ------------------------------------------------------------ - -#define bli_sssscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_dssscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_cssscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zssscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) - -#define bli_sdsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_ddsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_cdsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zdsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) - -#define bli_scsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_dcsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_ccsscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zcsscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) - -#define bli_szsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_dzsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_czsscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zzsscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) - -// -- (axy) = (??d) ------------------------------------------------------------ - -#define bli_ssdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_dsdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_csdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zsdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) - -#define bli_sddscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_dddscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_cddscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zddscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) - -#define bli_scdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_dcdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_ccdscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zcdscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) - -#define bli_szdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_dzdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_czdscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zzdscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_dscscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_cscscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zscscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) - -#define bli_sdcscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_ddcscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_cdcscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zdcscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) - -#define bli_sccscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_dccscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_cccscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zccscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) - -#define bli_szcscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_dzcscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_czcscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zzcscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_dszscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_cszscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zszscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) - -#define bli_sdzscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_ddzscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_cdzscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zdzscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) - -#define bli_sczscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_dczscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_cczscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zczscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) - -#define bli_szzscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_dzzscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_czzscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zzzscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) - - - -#define bli_sscal2jris( ar, ai, xr, xi, yr, yi ) bli_sssscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_dscal2jris( ar, ai, xr, xi, yr, yi ) bli_dddscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_cscal2jris( ar, ai, xr, xi, yr, yi ) bli_cccscal2jris( ar, ai, xr, xi, yr, yi ) -#define bli_zscal2jris( ar, ai, xr, xi, yr, yi ) bli_zzzscal2jris( ar, ai, xr, xi, yr, yi ) - -#endif - diff --git a/frame/include/level0/ri/bli_scal2ris.h b/frame/include/level0/ri/bli_scal2ris.h deleted file mode 100644 index e30fd9789f..0000000000 --- a/frame/include/level0/ri/bli_scal2ris.h +++ /dev/null @@ -1,173 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2RIS_H -#define BLIS_SCAL2RIS_H - -// scal2ris - -#define bli_rxscal2ris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) = (ar) * (xr); \ - ( void )ai; ( void )xi; ( void )yi; \ -} - -#define bli_cxscal2ris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) = (ar) * (xr) - (ai) * (xi); \ - (yi) = (ai) * (xr) + (ar) * (xi); \ -} - -#define bli_roscal2ris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) = (ar) * (xr) - (ai) * (xi); \ - ( void )yi; \ -} - -#define bli_crscal2ris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) = (ar) * (xr); \ - (yi) = (ar) * (xi); \ - ( void )ai; \ -} - -#define bli_rcscal2ris( ar, ai, xr, xi, yr, yi ) \ -{ \ - (yr) = (ar) * (xr); \ - (yi) = (ai) * (xr); \ - ( void )xi; \ -} - -// Notes: -// - The first char encodes the type of a. -// - The second char encodes the type of x. -// - The third char encodes the type of y. - -// -- (axy) = (??s) ------------------------------------------------------------ - -#define bli_sssscal2ris bli_rxscal2ris -#define bli_dssscal2ris bli_rxscal2ris -#define bli_cssscal2ris bli_rxscal2ris -#define bli_zssscal2ris bli_rxscal2ris - -#define bli_sdsscal2ris bli_rxscal2ris -#define bli_ddsscal2ris bli_rxscal2ris -#define bli_cdsscal2ris bli_rxscal2ris -#define bli_zdsscal2ris bli_rxscal2ris - -#define bli_scsscal2ris bli_rxscal2ris -#define bli_dcsscal2ris bli_rxscal2ris -#define bli_ccsscal2ris bli_roscal2ris -#define bli_zcsscal2ris bli_roscal2ris - -#define bli_szsscal2ris bli_rxscal2ris -#define bli_dzsscal2ris bli_rxscal2ris -#define bli_czsscal2ris bli_roscal2ris -#define bli_zzsscal2ris bli_roscal2ris - -// -- (axy) = (??d) ------------------------------------------------------------ - -#define bli_ssdscal2ris bli_rxscal2ris -#define bli_dsdscal2ris bli_rxscal2ris -#define bli_csdscal2ris bli_rxscal2ris -#define bli_zsdscal2ris bli_rxscal2ris - -#define bli_sddscal2ris bli_rxscal2ris -#define bli_dddscal2ris bli_rxscal2ris -#define bli_cddscal2ris bli_rxscal2ris -#define bli_zddscal2ris bli_rxscal2ris - -#define bli_scdscal2ris bli_rxscal2ris -#define bli_dcdscal2ris bli_rxscal2ris -#define bli_ccdscal2ris bli_roscal2ris -#define bli_zcdscal2ris bli_roscal2ris - -#define bli_szdscal2ris bli_rxscal2ris -#define bli_dzdscal2ris bli_rxscal2ris -#define bli_czdscal2ris bli_roscal2ris -#define bli_zzdscal2ris bli_roscal2ris - -// -- (axy) = (??c) ------------------------------------------------------------ - -#define bli_sscscal2ris bli_rxscal2ris -#define bli_dscscal2ris bli_rxscal2ris -#define bli_cscscal2ris bli_rcscal2ris -#define bli_zscscal2ris bli_rcscal2ris - -#define bli_sdcscal2ris bli_rxscal2ris -#define bli_ddcscal2ris bli_rxscal2ris -#define bli_cdcscal2ris bli_rcscal2ris -#define bli_zdcscal2ris bli_rcscal2ris - -#define bli_sccscal2ris bli_crscal2ris -#define bli_dccscal2ris bli_crscal2ris -#define bli_cccscal2ris bli_cxscal2ris -#define bli_zccscal2ris bli_cxscal2ris - -#define bli_szcscal2ris bli_crscal2ris -#define bli_dzcscal2ris bli_crscal2ris -#define bli_czcscal2ris bli_cxscal2ris -#define bli_zzcscal2ris bli_cxscal2ris - -// -- (axy) = (??z) ------------------------------------------------------------ - -#define bli_sszscal2ris bli_rxscal2ris -#define bli_dszscal2ris bli_rxscal2ris -#define bli_cszscal2ris bli_rcscal2ris -#define bli_zszscal2ris bli_rcscal2ris - -#define bli_sdzscal2ris bli_rxscal2ris -#define bli_ddzscal2ris bli_rxscal2ris -#define bli_cdzscal2ris bli_rcscal2ris -#define bli_zdzscal2ris bli_rcscal2ris - -#define bli_sczscal2ris bli_crscal2ris -#define bli_dczscal2ris bli_crscal2ris -#define bli_cczscal2ris bli_cxscal2ris -#define bli_zczscal2ris bli_cxscal2ris - -#define bli_szzscal2ris bli_crscal2ris -#define bli_dzzscal2ris bli_crscal2ris -#define bli_czzscal2ris bli_cxscal2ris -#define bli_zzzscal2ris bli_cxscal2ris - - - -#define bli_sscal2ris bli_sssscal2ris -#define bli_dscal2ris bli_dddscal2ris -#define bli_cscal2ris bli_cccscal2ris -#define bli_zscal2ris bli_zzzscal2ris - -#endif - diff --git a/frame/include/level0/ri/bli_scal2ris_mxn.h b/frame/include/level0/ri/bli_scal2ris_mxn.h deleted file mode 100644 index 85b242146b..0000000000 --- a/frame/include/level0/ri/bli_scal2ris_mxn.h +++ /dev/null @@ -1,173 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCAL2RIS_MXN_H -#define BLIS_SCAL2RIS_MXN_H - -// scal2ris_mxn - -BLIS_INLINE void bli_cscal2ris_mxn - ( - const conj_t conjx, - const dim_t m, - const dim_t n, - scomplex* restrict alpha, - scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - scomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t is_y - ) -{ - float* restrict alpha_r = ( float* )alpha; \ - float* restrict alpha_i = ( float* )alpha + 1; \ - float* restrict x_r = ( float* )x; \ - float* restrict x_i = ( float* )x + 1; \ - float* restrict y_r = ( float* )y; \ - float* restrict y_i = ( float* )y + is_y; \ - const dim_t incx2 = 2*rs_x; \ - const dim_t ldx2 = 2*cs_x; \ - - /* Treat the micro-panel as panel_dim x panel_len and column-stored - (unit row stride). */ \ - - if ( bli_is_conj( conjx ) ) - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - float* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; - float* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; - float* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; - float* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; - - bli_cscal2jris - ( - *alpha_r, - *alpha_i, - *chi11_r, - *chi11_i, - *psi11_r, - *psi11_i - ); - } - } - else /* if ( bli_is_noconj( conjx ) ) */ - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - float* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; - float* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; - float* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; - float* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; - - bli_cscal2ris - ( - *alpha_r, - *alpha_i, - *chi11_r, - *chi11_i, - *psi11_r, - *psi11_i - ); - } - } -} - -BLIS_INLINE void bli_zscal2ris_mxn - ( - const conj_t conjx, - const dim_t m, - const dim_t n, - dcomplex* restrict alpha, - dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, - dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t is_y - ) -{ - double* restrict alpha_r = ( double* )alpha; \ - double* restrict alpha_i = ( double* )alpha + 1; \ - double* restrict x_r = ( double* )x; \ - double* restrict x_i = ( double* )x + 1; \ - double* restrict y_r = ( double* )y; \ - double* restrict y_i = ( double* )y + is_y; \ - const dim_t incx2 = 2*rs_x; \ - const dim_t ldx2 = 2*cs_x; \ - - /* Treat the micro-panel as panel_dim x panel_len and column-stored - (unit row stride). */ \ - - if ( bli_is_conj( conjx ) ) - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - double* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; - double* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; - double* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; - double* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; - - bli_zscal2jris - ( - *alpha_r, - *alpha_i, - *chi11_r, - *chi11_i, - *psi11_r, - *psi11_i - ); - } - } - else /* if ( bli_is_noconj( conjx ) ) */ - { - for ( dim_t j = 0; j < n; ++j ) - for ( dim_t i = 0; i < m; ++i ) - { - double* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; - double* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; - double* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; - double* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; - - bli_zscal2ris - ( - *alpha_r, - *alpha_i, - *chi11_r, - *chi11_i, - *psi11_r, - *psi11_i - ); - } - } -} - - -#endif diff --git a/frame/include/level0/ri/bli_scalcjris.h b/frame/include/level0/ri/bli_scalcjris.h deleted file mode 100644 index 8050a924b7..0000000000 --- a/frame/include/level0/ri/bli_scalcjris.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCALCJRIS_H -#define BLIS_SCALCJRIS_H - -// scalcjris - -#define bli_sscalcjris( conj, ar, ai, xr, xi ) \ -{ \ - bli_sscalris( (ar), (ai), (xr), (xi) ); \ -} - -#define bli_dscalcjris( conj, ar, ai, xr, xi ) \ -{ \ - bli_dscalris( (ar), (ai), (xr), (xi) ); \ -} - -#define bli_cscalcjris( conj, ar, ai, xr, xi ) \ -{ \ - if ( bli_is_conj( conj ) ) { bli_cscaljris( (ar), (ai), (xr), (xi) ); } \ - else { bli_cscalris( (ar), (ai), (xr), (xi) ); } \ -} - -#define bli_zscalcjris( conj, ar, ai, xr, xi ) \ -{ \ - if ( bli_is_conj( conj ) ) { bli_zscaljris( (ar), (ai), (xr), (xi) ); } \ - else { bli_zscalris( (ar), (ai), (xr), (xi) ); } \ -} - -#define bli_iscalcjris( conj, ar, ai, xr, xi ) \ -{ \ - bli_iscalris( (ar), (xi), (xr), (xi) ); \ -} - -#define bli_scscalcjris( conj, ar, ai, xr, xi ) \ -{ \ - bli_scscalris( (ar), (ai), (xr), (xi) ); \ -} - -#define bli_dzscalcjris( conj, ar, ai, xr, xi ) \ -{ \ - bli_dzscalris( (ar), (ai), (xr), (xi) ); \ -} - -#endif diff --git a/frame/include/level0/ri/bli_scaljris.h b/frame/include/level0/ri/bli_scaljris.h deleted file mode 100644 index 29722c1b52..0000000000 --- a/frame/include/level0/ri/bli_scaljris.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCALJRIS_H -#define BLIS_SCALJRIS_H - -// scaljris - -#define bli_sscaljris( ar, ai, xr, xi ) bli_sscalris( (ar), -(ai), (xr), (xi) ) -#define bli_dscaljris( ar, ai, xr, xi ) bli_dscalris( (ar), -(ai), (xr), (xi) ) -#define bli_cscaljris( ar, ai, xr, xi ) bli_cscalris( (ar), -(ai), (xr), (xi) ) -#define bli_zscaljris( ar, ai, xr, xi ) bli_zscalris( (ar), -(ai), (xr), (xi) ) - -#define bli_scscaljris( ar, ai, xr, xi ) bli_scscalris( (ar), -(ai), (xr), (xi) ) -#define bli_dzscaljris( ar, ai, xr, xi ) bli_dzscalris( (ar), -(ai), (xr), (xi) ) - -#endif - diff --git a/frame/include/level0/ri/bli_scalris.h b/frame/include/level0/ri/bli_scalris.h deleted file mode 100644 index e5eeb19bae..0000000000 --- a/frame/include/level0/ri/bli_scalris.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCALRIS_H -#define BLIS_SCALRIS_H - -// scalris - -#define bli_sscalris( ar, ai, xr, xi ) \ -{ \ - (xr) = (ar) * (xr); \ -} - -#define bli_dscalris( ar, ai, xr, xi ) \ -{ \ - (xr) = (ar) * (xr); \ -} - -#define bli_cscalris( ar, ai, xr, xi ) \ -{ \ - float yr = (ar) * (xr) - (ai) * (xi); \ - float yi = (ai) * (xr) + (ar) * (xi); \ - (xr) = yr; \ - (xi) = yi; \ -} - -#define bli_zscalris( ar, ai, xr, xi ) \ -{ \ - double yr = (ar) * (xr) - (ai) * (xi); \ - double yi = (ai) * (xr) + (ar) * (xi); \ - (xr) = yr; \ - (xi) = yi; \ -} - -#define bli_scscalris( ar, ai, xr, xi ) \ -{ \ - (xr) = (ar) * (xr); \ - (xi) = (ar) * (xi); \ -} - -#define bli_dzscalris( ar, ai, xr, xi ) \ -{ \ - (xr) = (ar) * (xr); \ - (xi) = (ar) * (xi); \ -} - -#endif - diff --git a/frame/include/level0/ri/bli_scalris_mxn_uplo.h b/frame/include/level0/ri/bli_scalris_mxn_uplo.h deleted file mode 100644 index ed2b7d18e7..0000000000 --- a/frame/include/level0/ri/bli_scalris_mxn_uplo.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SCALRIS_MXN_UPLO_H -#define BLIS_SCALRIS_MXN_UPLO_H - -// scalris_mxn_u - -#define bli_cscalris_mxn_u( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - { \ - if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ - { \ - bli_cscalris( *(ar), \ - *(ai), \ - *((xr) + _i*rs_x + _j*cs_x), \ - *((xi) + _i*rs_x + _j*cs_x) ); \ - } \ - } \ -} - -#define bli_zscalris_mxn_u( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - { \ - if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ - { \ - bli_zscalris( *(ar), \ - *(ai), \ - *((xr) + _i*rs_x + _j*cs_x), \ - *((xi) + _i*rs_x + _j*cs_x) ); \ - } \ - } \ -} - -// scalris_mxn_l - -#define bli_cscalris_mxn_l( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - { \ - if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ - { \ - bli_cscalris( *(ar), \ - *(ai), \ - *((xr) + _i*rs_x + _j*cs_x), \ - *((xi) + _i*rs_x + _j*cs_x) ); \ - } \ - } \ -} - -#define bli_zscalris_mxn_l( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \ -{ \ - dim_t _i, _j; \ -\ - for ( _j = 0; _j < n; ++_j ) \ - for ( _i = 0; _i < m; ++_i ) \ - { \ - if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ - { \ - bli_zscalris( *(ar), \ - *(ai), \ - *((xr) + _i*rs_x + _j*cs_x), \ - *((xi) + _i*rs_x + _j*cs_x) ); \ - } \ - } \ -} - -#endif diff --git a/frame/include/level0/ri/bli_set0ris.h b/frame/include/level0/ri/bli_set0ris.h deleted file mode 100644 index a4e0ed47f8..0000000000 --- a/frame/include/level0/ri/bli_set0ris.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SET0RIS_H -#define BLIS_SET0RIS_H - -// set0ris - -#define bli_sset0ris( xr, xi ) bli_scopyris( 0.0F, 0.0F, xr, xi ) -#define bli_dset0ris( xr, xi ) bli_dcopyris( 0.0 , 0.0 , xr, xi ) -#define bli_cset0ris( xr, xi ) bli_ccopyris( 0.0F, 0.0F, xr, xi ) -#define bli_zset0ris( xr, xi ) bli_zcopyris( 0.0 , 0.0 , xr, xi ) - -#endif - diff --git a/frame/include/level0/ri/bli_sqrt2ris.h b/frame/include/level0/ri/bli_sqrt2ris.h deleted file mode 100644 index 06fbe7289a..0000000000 --- a/frame/include/level0/ri/bli_sqrt2ris.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SQRT2RIS_H -#define BLIS_SQRT2RIS_H - -// sqrt2ris - -#define bli_ssqrt2ris( xr, xi, ar, ai ) \ -{ \ - (ar) = sqrtf( (xr) ); \ -} - -#define bli_dsqrt2ris( xr, xi, ar, ai ) \ -{ \ - (ar) = sqrt( (xr) ); \ -} - -#define bli_csqrt2ris( xr, xi, ar, ai ) \ -{ \ - float s = bli_fmaxabs( (xr), (xi) ); \ - float mag; \ - if ( s == 0.0F ) mag = 0.0F; \ - else \ - { \ - mag = sqrtf( s ) * \ - sqrtf( ( (xr) / s ) * (xr) + \ - ( (xi) / s ) * (xi) ); \ - } \ -\ - (ar) = sqrtf( ( mag + (xr) ) / 2.0F ); \ - (ai) = sqrtf( ( mag - (xi) ) / 2.0F ); \ -} - -#define bli_zsqrt2ris( xr, xi, ar, ai ) \ -{ \ - double s = bli_fmaxabs( (xr), (xi) ); \ - double mag; \ - if ( s == 0.0 ) mag = 0.0; \ - else \ - { \ - mag = sqrt( s ) * \ - sqrt( ( (xr) / s ) * (xr) + \ - ( (xi) / s ) * (xi) ); \ - } \ -\ - (ar) = sqrt( ( mag + (xr) ) / 2.0 ); \ - (ai) = sqrt( ( mag - (xi) ) / 2.0 ); \ -} - -#define bli_scsqrt2ris( xr, xi, ar, ai ) \ -{ \ - (ar) = sqrtf( (xr) ); \ - (ai) = 0.0F; \ -} - -#define bli_dzsqrt2ris( xr, xi, ar, ai ) \ -{ \ - (ar) = sqrt( (xr) ); \ - (ai) = 0.0; \ -} - -#endif - diff --git a/frame/include/level0/ri/bli_subjris.h b/frame/include/level0/ri/bli_subjris.h deleted file mode 100644 index a35fdbc721..0000000000 --- a/frame/include/level0/ri/bli_subjris.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SUBJRIS_H -#define BLIS_SUBJRIS_H - -// subjris - -#define bli_ssubjris( ar, ai, xr, xi ) bli_ssubris( (ar), -(ai), (xr), (xi) ) -#define bli_dsubjris( ar, ai, xr, xi ) bli_dsubris( (ar), -(ai), (xr), (xi) ) -#define bli_csubjris( ar, ai, xr, xi ) bli_csubris( (ar), -(ai), (xr), (xi) ) -#define bli_zsubjris( ar, ai, xr, xi ) bli_zsubris( (ar), -(ai), (xr), (xi) ) - -#endif - diff --git a/frame/include/level0/ri/bli_subris.h b/frame/include/level0/ri/bli_subris.h deleted file mode 100644 index 4c340604d6..0000000000 --- a/frame/include/level0/ri/bli_subris.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SUBRIS_H -#define BLIS_SUBRIS_H - -// subris - -#define bli_ssubris( ar, ai, xr, xi ) \ -{ \ - (xr) = (xr) - (ar); \ -} - -#define bli_dsubris( ar, ai, xr, xi ) \ -{ \ - (xr) = (xr) - (ar); \ -} - -#define bli_csubris( ar, ai, xr, xi ) \ -{ \ - (xr) = (xr) - (ar); \ - (xi) = (xi) - (ai); \ -} - -#define bli_zsubris( ar, ai, xr, xi ) \ -{ \ - (xr) = (xr) - (ar); \ - (xi) = (xi) - (ai); \ -} - -#endif - diff --git a/frame/include/level0/ri/bli_swapris.h b/frame/include/level0/ri/bli_swapris.h deleted file mode 100644 index 5b080fa766..0000000000 --- a/frame/include/level0/ri/bli_swapris.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_SWAPRIS_H -#define BLIS_SWAPRIS_H - -// swapris - -#define bli_sswapris( ar, ai, br, bi ) \ -{ \ - float tr, ti; \ -\ - bli_scopyris( (br), (bi), (tr), (ti) ); \ - bli_scopyris( (ar), (ai), (br), (bi) ); \ - bli_scopyris( (tr), (ti), (ar), (ai) ); \ -} - -#define bli_dswapris( ar, ai, br, bi ) \ -{ \ - double tr, ti; \ -\ - bli_dcopyris( (br), (bi), (tr), (ti) ); \ - bli_dcopyris( (ar), (ai), (br), (bi) ); \ - bli_dcopyris( (tr), (ti), (ar), (ai) ); \ -} - -#define bli_cswapris( ar, ai, br, bi ) \ -{ \ - scomplex tr, ti; \ -\ - bli_ccopyris( (br), (bi), (tr), (ti) ); \ - bli_ccopyris( (ar), (ai), (br), (bi) ); \ - bli_ccopyris( (tr), (ti), (ar), (ai) ); \ -} - -#define bli_zswapris( ar, ai, br, bi ) \ -{ \ - dcomplex tr, ti; \ -\ - bli_zcopyris( (br), (bi), (tr), (ti) ); \ - bli_zcopyris( (ar), (ai), (br), (bi) ); \ - bli_zcopyris( (tr), (ti), (ar), (ai) ); \ -} - -#endif - diff --git a/frame/include/level0/ri/bli_xpbyjris.h b/frame/include/level0/ri/bli_xpbyjris.h deleted file mode 100644 index e441a2b454..0000000000 --- a/frame/include/level0/ri/bli_xpbyjris.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_XPBYJRIS_H -#define BLIS_XPBYJRIS_H - -// xpbyjris - -#define bli_rxxpbyjris( xr, xi, br, bi, yr, yi ) \ -{ \ - (yr) = (xr) + (br) * (yr); \ -} - -#define bli_cxxpbyjris( xr, xi, br, bi, yr, yi ) \ -{ \ - const __typeof__(yr) yt_r = (xr) + (br) * (yr) - (bi) * (yi); \ - const __typeof__(yi) yt_i = -(xi) + (bi) * (yr) + (br) * (yi); \ - (yr) = yt_r; \ - (yi) = yt_i; \ -} - -#define bli_crxpbyjris( xr, xi, br, bi, yr, yi ) \ -{ \ - const __typeof__(yr) yt_r = (xr) + (br) * (yr); \ - const __typeof__(yi) yt_i = -(xi) + (br) * (yi); \ - (yr) = yt_r; \ - (yi) = yt_i; \ -} - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of b. -// - The third char encodes the type of y. - -// -- (xby) = (??s) ------------------------------------------------------------ - -#define bli_sssxpbyjris bli_rxxpbyjris -#define bli_dssxpbyjris bli_rxxpbyjris -#define bli_cssxpbyjris bli_rxxpbyjris -#define bli_zssxpbyjris bli_rxxpbyjris - -#define bli_sdsxpbyjris bli_rxxpbyjris -#define bli_ddsxpbyjris bli_rxxpbyjris -#define bli_cdsxpbyjris bli_rxxpbyjris -#define bli_zdsxpbyjris bli_rxxpbyjris - -#define bli_scsxpbyjris bli_rxxpbyjris -#define bli_dcsxpbyjris bli_rxxpbyjris -#define bli_ccsxpbyjris bli_rxxpbyjris -#define bli_zcsxpbyjris bli_rxxpbyjris - -#define bli_szsxpbyjris bli_rxxpbyjris -#define bli_dzsxpbyjris bli_rxxpbyjris -#define bli_czsxpbyjris bli_rxxpbyjris -#define bli_zzsxpbyjris bli_rxxpbyjris - -// -- (xby) = (??d) ------------------------------------------------------------ - -#define bli_ssdxpbyjris bli_rxxpbyjris -#define bli_dsdxpbyjris bli_rxxpbyjris -#define bli_csdxpbyjris bli_rxxpbyjris -#define bli_zsdxpbyjris bli_rxxpbyjris - -#define bli_sddxpbyjris bli_rxxpbyjris -#define bli_dddxpbyjris bli_rxxpbyjris -#define bli_cddxpbyjris bli_rxxpbyjris -#define bli_zddxpbyjris bli_rxxpbyjris - -#define bli_scdxpbyjris bli_rxxpbyjris -#define bli_dcdxpbyjris bli_rxxpbyjris -#define bli_ccdxpbyjris bli_rxxpbyjris -#define bli_zcdxpbyjris bli_rxxpbyjris - -#define bli_szdxpbyjris bli_rxxpbyjris -#define bli_dzdxpbyjris bli_rxxpbyjris -#define bli_czdxpbyjris bli_rxxpbyjris -#define bli_zzdxpbyjris bli_rxxpbyjris - -// -- (xby) = (??c) ------------------------------------------------------------ - -#define bli_sscxpbyjris bli_rxxpbyjris -#define bli_dscxpbyjris bli_rxxpbyjris -#define bli_cscxpbyjris bli_crxpbyjris -#define bli_zscxpbyjris bli_crxpbyjris - -#define bli_sdcxpbyjris bli_rxxpbyjris -#define bli_ddcxpbyjris bli_rxxpbyjris -#define bli_cdcxpbyjris bli_crxpbyjris -#define bli_zdcxpbyjris bli_crxpbyjris - -#define bli_sccxpbyjris bli_cxxpbyjris -#define bli_dccxpbyjris bli_cxxpbyjris -#define bli_cccxpbyjris bli_cxxpbyjris -#define bli_zccxpbyjris bli_cxxpbyjris - -#define bli_szcxpbyjris bli_cxxpbyjris -#define bli_dzcxpbyjris bli_cxxpbyjris -#define bli_czcxpbyjris bli_cxxpbyjris -#define bli_zzcxpbyjris bli_cxxpbyjris - -// -- (xby) = (??z) ------------------------------------------------------------ - -#define bli_sszxpbyjris bli_rxxpbyjris -#define bli_dszxpbyjris bli_rxxpbyjris -#define bli_cszxpbyjris bli_crxpbyjris -#define bli_zszxpbyjris bli_crxpbyjris - -#define bli_sdzxpbyjris bli_rxxpbyjris -#define bli_ddzxpbyjris bli_rxxpbyjris -#define bli_cdzxpbyjris bli_crxpbyjris -#define bli_zdzxpbyjris bli_crxpbyjris - -#define bli_sczxpbyjris bli_cxxpbyjris -#define bli_dczxpbyjris bli_cxxpbyjris -#define bli_cczxpbyjris bli_cxxpbyjris -#define bli_zczxpbyjris bli_cxxpbyjris - -#define bli_szzxpbyjris bli_cxxpbyjris -#define bli_dzzxpbyjris bli_cxxpbyjris -#define bli_czzxpbyjris bli_cxxpbyjris -#define bli_zzzxpbyjris bli_cxxpbyjris - - - -#define bli_sxpbyjris bli_sssxpbyjris -#define bli_dxpbyjris bli_dddxpbyjris -#define bli_cxpbyjris bli_cccxpbyjris -#define bli_zxpbyjris bli_zzzxpbyjris - -#endif - diff --git a/frame/include/level0/ri/bli_xpbyris.h b/frame/include/level0/ri/bli_xpbyris.h deleted file mode 100644 index 4d693de92f..0000000000 --- a/frame/include/level0/ri/bli_xpbyris.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_XPBYRIS_H -#define BLIS_XPBYRIS_H - -// xpbyris - -#define bli_rxxpbyris( xr, xi, br, bi, yr, yi ) \ -{ \ - (yr) = (xr) + (br) * (yr); \ -} - -#define bli_cxxpbyris( xr, xi, br, bi, yr, yi ) \ -{ \ - const __typeof__(yr) yt_r = (xr) + (br) * (yr) - (bi) * (yi); \ - const __typeof__(yi) yt_i = (xi) + (bi) * (yr) + (br) * (yi); \ - (yr) = yt_r; \ - (yi) = yt_i; \ -} - -#define bli_crxpbyris( xr, xi, br, bi, yr, yi ) \ -{ \ - const __typeof__(yr) yt_r = (xr) + (br) * (yr); \ - const __typeof__(yi) yt_i = (xi) + (br) * (yi); \ - (yr) = yt_r; \ - (yi) = yt_i; \ -} - -// Notes: -// - The first char encodes the type of x. -// - The second char encodes the type of b. -// - The third char encodes the type of y. - -// -- (xby) = (??s) ------------------------------------------------------------ - -#define bli_sssxpbyris bli_rxxpbyris -#define bli_dssxpbyris bli_rxxpbyris -#define bli_cssxpbyris bli_rxxpbyris -#define bli_zssxpbyris bli_rxxpbyris - -#define bli_sdsxpbyris bli_rxxpbyris -#define bli_ddsxpbyris bli_rxxpbyris -#define bli_cdsxpbyris bli_rxxpbyris -#define bli_zdsxpbyris bli_rxxpbyris - -#define bli_scsxpbyris bli_rxxpbyris -#define bli_dcsxpbyris bli_rxxpbyris -#define bli_ccsxpbyris bli_rxxpbyris -#define bli_zcsxpbyris bli_rxxpbyris - -#define bli_szsxpbyris bli_rxxpbyris -#define bli_dzsxpbyris bli_rxxpbyris -#define bli_czsxpbyris bli_rxxpbyris -#define bli_zzsxpbyris bli_rxxpbyris - -// -- (xby) = (??d) ------------------------------------------------------------ - -#define bli_ssdxpbyris bli_rxxpbyris -#define bli_dsdxpbyris bli_rxxpbyris -#define bli_csdxpbyris bli_rxxpbyris -#define bli_zsdxpbyris bli_rxxpbyris - -#define bli_sddxpbyris bli_rxxpbyris -#define bli_dddxpbyris bli_rxxpbyris -#define bli_cddxpbyris bli_rxxpbyris -#define bli_zddxpbyris bli_rxxpbyris - -#define bli_scdxpbyris bli_rxxpbyris -#define bli_dcdxpbyris bli_rxxpbyris -#define bli_ccdxpbyris bli_rxxpbyris -#define bli_zcdxpbyris bli_rxxpbyris - -#define bli_szdxpbyris bli_rxxpbyris -#define bli_dzdxpbyris bli_rxxpbyris -#define bli_czdxpbyris bli_rxxpbyris -#define bli_zzdxpbyris bli_rxxpbyris - -// -- (xby) = (??c) ------------------------------------------------------------ - -#define bli_sscxpbyris bli_rxxpbyris -#define bli_dscxpbyris bli_rxxpbyris -#define bli_cscxpbyris bli_crxpbyris -#define bli_zscxpbyris bli_crxpbyris - -#define bli_sdcxpbyris bli_rxxpbyris -#define bli_ddcxpbyris bli_rxxpbyris -#define bli_cdcxpbyris bli_crxpbyris -#define bli_zdcxpbyris bli_crxpbyris - -#define bli_sccxpbyris bli_cxxpbyris -#define bli_dccxpbyris bli_cxxpbyris -#define bli_cccxpbyris bli_cxxpbyris -#define bli_zccxpbyris bli_cxxpbyris - -#define bli_szcxpbyris bli_cxxpbyris -#define bli_dzcxpbyris bli_cxxpbyris -#define bli_czcxpbyris bli_cxxpbyris -#define bli_zzcxpbyris bli_cxxpbyris - -// -- (xby) = (??z) ------------------------------------------------------------ - -#define bli_sszxpbyris bli_rxxpbyris -#define bli_dszxpbyris bli_rxxpbyris -#define bli_cszxpbyris bli_crxpbyris -#define bli_zszxpbyris bli_crxpbyris - -#define bli_sdzxpbyris bli_rxxpbyris -#define bli_ddzxpbyris bli_rxxpbyris -#define bli_cdzxpbyris bli_crxpbyris -#define bli_zdzxpbyris bli_crxpbyris - -#define bli_sczxpbyris bli_cxxpbyris -#define bli_dczxpbyris bli_cxxpbyris -#define bli_cczxpbyris bli_cxxpbyris -#define bli_zczxpbyris bli_cxxpbyris - -#define bli_szzxpbyris bli_cxxpbyris -#define bli_dzzxpbyris bli_cxxpbyris -#define bli_czzxpbyris bli_cxxpbyris -#define bli_zzzxpbyris bli_cxxpbyris - - - -#define bli_sxpbyris bli_sssxpbyris -#define bli_dxpbyris bli_dddxpbyris -#define bli_cxpbyris bli_cccxpbyris -#define bli_zxpbyris bli_zzzxpbyris - -#endif - diff --git a/frame/util/bli_util_check.c b/frame/util/bli_util_check.c index 3fafb4e50c..370619971f 100644 --- a/frame/util/bli_util_check.c +++ b/frame/util/bli_util_check.c @@ -151,10 +151,33 @@ void PASTEMAC(opname,_check) \ } GENFRONT( eqsc ) + + +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,_check) \ + ( \ + const obj_t* chi, \ + const obj_t* psi, \ + const bool* is \ + ) \ +{ \ + bli_l0_xxbsc_check( chi, psi, is ); \ +\ + err_t e_val; \ +\ + e_val = bli_check_real_datatype( bli_obj_dt( chi ) ); \ + bli_check_error_code( e_val ); \ +\ + e_val = bli_check_real_datatype( bli_obj_dt( psi ) ); \ + bli_check_error_code( e_val ); \ +} + GENFRONT( ltsc ) -GENFRONT( ltesc ) +GENFRONT( lesc ) GENFRONT( gtsc ) -GENFRONT( gtesc ) +GENFRONT( gesc ) #undef GENFRONT diff --git a/frame/util/bli_util_check.h b/frame/util/bli_util_check.h index 26986b52cc..3d91a7c96f 100644 --- a/frame/util/bli_util_check.h +++ b/frame/util/bli_util_check.h @@ -130,9 +130,9 @@ void PASTEMAC(opname,_check) \ GENTPROT( eqsc ) GENTPROT( ltsc ) -GENTPROT( ltesc ) +GENTPROT( lesc ) GENTPROT( gtsc ) -GENTPROT( gtesc ) +GENTPROT( gesc ) #undef GENPROT diff --git a/frame/util/bli_util_fpa.c b/frame/util/bli_util_fpa.c index bbba052c62..c577ef0cdc 100644 --- a/frame/util/bli_util_fpa.c +++ b/frame/util/bli_util_fpa.c @@ -89,12 +89,29 @@ PASTEMAC(opname,_qfp)( num_t dt ) \ GENFRONT( eqsc ) GENFRONT( eqv ) GENFRONT( eqm ) -GENFRONT( ltsc ) -GENFRONT( ltesc ) -GENFRONT( gtsc ) -GENFRONT( gtesc ) GENFRONT( fprintv ) GENFRONT( fprintm ) //GENFRONT( printv ) //GENFRONT( printm ) +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +/* +GENARRAY_FPA( void_fp, opname ); \ +*/ \ +\ +GENARRAYRO_FPA( PASTECH(opname,_vft), \ + PASTECH(opname) ); \ +\ +PASTECH(opname,_vft) \ +PASTEMAC(opname,_qfp)( num_t dt ) \ +{ \ + return PASTECH(opname,_fpa)[ dt ]; \ +} + +GENFRONT( ltsc ) +GENFRONT( lesc ) +GENFRONT( gtsc ) +GENFRONT( gesc ) + diff --git a/frame/util/bli_util_fpa.h b/frame/util/bli_util_fpa.h index 5ee0f4adb3..ee4ce2bd2e 100644 --- a/frame/util/bli_util_fpa.h +++ b/frame/util/bli_util_fpa.h @@ -70,9 +70,9 @@ GENPROT( eqsc ) GENPROT( eqv ) GENPROT( eqm ) GENPROT( ltsc ) -GENPROT( ltesc ) +GENPROT( lesc ) GENPROT( gtsc ) -GENPROT( gtesc ) +GENPROT( gesc ) GENPROT( fprintv ) GENPROT( fprintm ) //GENPROT( printv ) diff --git a/frame/util/bli_util_ft.h b/frame/util/bli_util_ft.h index 2bb1943d76..c4af5be8b4 100644 --- a/frame/util/bli_util_ft.h +++ b/frame/util/bli_util_ft.h @@ -248,7 +248,7 @@ typedef void (*PASTECH(ch,opname,tsuf)) \ INSERT_GENTDEF( eqm ) -// ltsc, ltesc, gtsc, gtesc +// ltsc, lesc, gtsc, gesc #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ @@ -261,9 +261,9 @@ typedef void (*PASTECH(ch,opname,tsuf)) \ ); INSERT_GENTDEF( ltsc ) -INSERT_GENTDEF( ltesc ) +INSERT_GENTDEF( lesc ) INSERT_GENTDEF( gtsc ) -INSERT_GENTDEF( gtesc ) +INSERT_GENTDEF( gesc ) #endif // #ifdef BLIS_OAPI_BASIC diff --git a/frame/util/bli_util_oapi.c b/frame/util/bli_util_oapi.c index 4810b6f001..f6a8144213 100644 --- a/frame/util/bli_util_oapi.c +++ b/frame/util/bli_util_oapi.c @@ -395,7 +395,7 @@ void PASTEMAC(opname) \ /* Integer objects are handled separately. */ \ if ( bli_is_int( dt ) ) \ { \ - *is_eq = bli_ieqa( buf_chi, buf_psi ); \ + *is_eq = bli_ieq( buf_chi, buf_psi ); \ return; \ } \ \ @@ -571,9 +571,9 @@ void PASTEMAC(opname) \ } GENFRONT( ltsc ) -GENFRONT( ltesc ) +GENFRONT( lesc ) GENFRONT( gtsc ) -GENFRONT( gtesc ) +GENFRONT( gesc ) #undef GENFRONT diff --git a/frame/util/bli_util_oapi.h b/frame/util/bli_util_oapi.h index 2a1d700d8e..cc159d8a73 100644 --- a/frame/util/bli_util_oapi.h +++ b/frame/util/bli_util_oapi.h @@ -154,9 +154,9 @@ GENPROT( eqsc ) GENPROT( eqv ) GENPROT( eqm ) GENPROT( ltsc ) -GENPROT( ltesc ) +GENPROT( lesc ) GENPROT( gtsc ) -GENPROT( gtesc ) +GENPROT( gesc ) #undef GENPROT diff --git a/frame/util/bli_util_tapi.c b/frame/util/bli_util_tapi.c index c3521f244c..5997828da0 100644 --- a/frame/util/bli_util_tapi.c +++ b/frame/util/bli_util_tapi.c @@ -59,7 +59,7 @@ void PASTEMAC(ch,opname,EX_SUF) \ zero and return early. */ \ if ( bli_zero_dim1( n ) ) \ { \ - PASTEMAC(chr,set0s)( *asum ); \ + bli_tset0s( chr, *asum ); \ return; \ } \ \ @@ -138,7 +138,7 @@ void PASTEMAC(ch,opname,EX_SUF) \ early. */ \ if ( bli_zero_dim1( n ) ) \ { \ - PASTEMAC(chr,set0s)( *norm ); \ + bli_tset0s( chr, *norm ); \ return; \ } \ \ @@ -185,7 +185,7 @@ void PASTEMAC(ch,opname,EX_SUF) \ early. */ \ if ( bli_zero_dim2( m, n ) ) \ { \ - PASTEMAC(chr,set0s)( *norm ); \ + bli_tset0s( chr, *norm ); \ return; \ } \ \ @@ -236,10 +236,10 @@ void PASTEMAC(ch,opname,EX_SUF) \ ctype_r norm; \ \ /* Set the norm to zero. */ \ - PASTEMAC(chr,set0s)( norm ); \ + bli_tset0s( chr, norm ); \ \ /* Iterate at least once, but continue iterating until the norm is not zero. */ \ - while ( PASTEMAC(chr,eq0)( norm ) ) \ + while ( bli_teq0s( chr, norm ) ) \ { \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -295,10 +295,10 @@ void PASTEMAC(ch,opname,EX_SUF) \ ctype_r norm; \ \ /* Set the norm to zero. */ \ - PASTEMAC(chr,set0s)( norm ); \ + bli_tset0s( chr, norm ); \ \ /* Iterate at least once, but continue iterating until the norm is not zero. */ \ - while ( PASTEMAC(chr,eq0)( norm ) ) \ + while ( bli_teq0s( chr, norm ) ) \ { \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -393,9 +393,9 @@ void PASTEMAC(ch,opname) \ \ ctype chi_conj; \ \ - PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \ + bli_tcopycjs( ch,ch, conjchi, *chi, chi_conj ); \ \ - *is_eq = PASTEMAC(ch,eq)( chi_conj, *psi ); \ + *is_eq = PASTEMAC(t,eqs)( ch,ch,ch, chi_conj, *psi ); \ } INSERT_GENTFUNC_BASIC( eqsc ) @@ -475,8 +475,8 @@ void PASTEMAC(ch,opname) \ INSERT_GENTFUNC_BASIC( eqm ) -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, kername ) \ +#undef GENTFUNCRO +#define GENTFUNCRO( ctype, ch, opname, kername ) \ \ void PASTEMAC(ch,opname) \ ( \ @@ -490,10 +490,10 @@ void PASTEMAC(ch,opname) \ *is = PASTEMAC(ch,kername)( *chi, *psi ); \ } -INSERT_GENTFUNC_BASIC( ltsc, lt ) -INSERT_GENTFUNC_BASIC( ltesc, lte ) -INSERT_GENTFUNC_BASIC( gtsc, gt ) -INSERT_GENTFUNC_BASIC( gtesc, gte ) +INSERT_GENTFUNCRO_BASIC( ltsc, lt ) +INSERT_GENTFUNCRO_BASIC( lesc, le ) +INSERT_GENTFUNCRO_BASIC( gtsc, gt ) +INSERT_GENTFUNCRO_BASIC( gesc, ge ) #undef GENTFUNC diff --git a/frame/util/bli_util_tapi.h b/frame/util/bli_util_tapi.h index 715b22a26c..1b9db64bb8 100644 --- a/frame/util/bli_util_tapi.h +++ b/frame/util/bli_util_tapi.h @@ -202,8 +202,8 @@ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ INSERT_GENTPROT_BASIC( eqm ) -#undef GENTPROT -#define GENTPROT( ctype, ch, opname ) \ +#undef GENTPROTRO +#define GENTPROTRO( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ ( \ @@ -212,10 +212,10 @@ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ bool* is \ ); -INSERT_GENTPROT_BASIC( ltsc ) -INSERT_GENTPROT_BASIC( ltesc ) -INSERT_GENTPROT_BASIC( gtsc ) -INSERT_GENTPROT_BASIC( gtesc ) +INSERT_GENTPROTRO_BASIC( ltsc ) +INSERT_GENTPROTRO_BASIC( lesc ) +INSERT_GENTPROTRO_BASIC( gtsc ) +INSERT_GENTPROTRO_BASIC( gesc ) #undef GENTPROT diff --git a/frame/util/bli_util_unb_var1.c b/frame/util/bli_util_unb_var1.c index b3767e6a8b..0546d67860 100644 --- a/frame/util/bli_util_unb_var1.c +++ b/frame/util/bli_util_unb_var1.c @@ -59,26 +59,26 @@ void PASTEMAC(ch,varname) \ dim_t i; \ \ /* Initialize the absolute sum accumulator to zero. */ \ - PASTEMAC(chr,set0s)( absum ); \ + bli_tset0s( chr, absum ); \ \ for ( i = 0; i < n; ++i ) \ { \ chi1 = x + (i )*incx; \ \ /* Get the real and imaginary components of chi1. */ \ - PASTEMAC(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \ + bli_tgets( ch,chr, *chi1, chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ chi1_r = bli_fabs( chi1_r ); \ chi1_i = bli_fabs( chi1_i ); \ \ /* Accumulate the real and imaginary components into absum. */ \ - PASTEMAC(chr,adds)( chi1_r, absum ); \ - PASTEMAC(chr,adds)( chi1_i, absum ); \ + bli_tadds( chr,chr,chr, chi1_r, absum ); \ + bli_tadds( chr,chr,chr, chi1_i, absum ); \ } \ \ /* Store the final value of absum to the output variable. */ \ - PASTEMAC(chr,copys)( absum, *asum ); \ + bli_tcopys( chr,chr, absum, *asum ); \ } INSERT_GENTFUNCR_BASIC( asumv_unb_var1 ) @@ -245,21 +245,21 @@ void PASTEMAC(ch,varname) \ dim_t i; \ \ /* Initialize the absolute sum accumulator to zero. */ \ - PASTEMAC(chr,set0s)( absum ); \ + bli_tset0s( chr, absum ); \ \ for ( i = 0; i < n; ++i ) \ { \ chi1 = x + (i )*incx; \ \ /* Compute the absolute value (or complex magnitude) of chi1. */ \ - PASTEMAC(ch,chr,abval2s)( *chi1, abs_chi1 ); \ + bli_tabval2s( ch,chr,chr, *chi1, abs_chi1 ); \ \ /* Accumulate the absolute value of chi1 into absum. */ \ - PASTEMAC(chr,adds)( abs_chi1, absum ); \ + bli_tadds( chr,chr,chr, abs_chi1, absum ); \ } \ \ /* Store final value of absum to the output variable. */ \ - PASTEMAC(chr,copys)( absum, *norm ); \ + bli_tcopys( chr,chr, absum, *norm ); \ } INSERT_GENTFUNCR_BASIC( norm1v_unb_var1 ) @@ -284,8 +284,8 @@ void PASTEMAC(ch,varname) \ ctype_r sqrt_sumsq; \ \ /* Initialize scale and sumsq to begin the summation. */ \ - PASTEMAC(chr,copys)( *zero, scale ); \ - PASTEMAC(chr,copys)( *one, sumsq ); \ + bli_tcopys( chr,chr, *zero, scale ); \ + bli_tcopys( chr,chr, *one, sumsq ); \ \ /* Compute the sum of the squares of the vector. */ \ PASTEMAC(ch,kername) \ @@ -299,11 +299,11 @@ void PASTEMAC(ch,varname) \ ); \ \ /* Compute: norm = scale * sqrt( sumsq ) */ \ - PASTEMAC(chr,sqrt2s)( sumsq, sqrt_sumsq ); \ - PASTEMAC(chr,scals)( scale, sqrt_sumsq ); \ + bli_tsqrt2s( chr,chr,chr, sumsq, sqrt_sumsq ); \ + bli_tscals( chr,chr,chr, scale, sqrt_sumsq ); \ \ /* Store the final value to the output variable. */ \ - PASTEMAC(chr,copys)( sqrt_sumsq, *norm ); \ + bli_tcopys( chr,chr, sqrt_sumsq, *norm ); \ } //INSERT_GENTFUNCR_BASIC( normfv_unb_var1, sumsqv_unb_var1 ) @@ -337,8 +337,8 @@ void PASTEMAC(ch,varname) \ ctype_r sqrt_sumsq; \ \ /* Initialize scale and sumsq to begin the summation. */ \ - PASTEMAC(chr,copys)( *zero, scale ); \ - PASTEMAC(chr,copys)( *one, sumsq ); \ + bli_tcopys( chr, *zero, scale ); \ + bli_tcopys( chr, *one, sumsq ); \ \ /* An optimization: first try to use dotv to compute the sum of the squares of the vector. If no floating-point exceptions @@ -368,13 +368,13 @@ void PASTEMAC(ch,varname) \ rntm \ ); \ \ - PASTEMAC(ch,chr,copys)( sumsqc, sumsq ); \ + bli_tcopys( ch,chr, sumsqc, sumsq ); \ \ f_exp_raised = fetestexcept( FE_OVERFLOW | FE_INVALID );\ \ if ( !f_exp_raised ) \ { \ - PASTEMAC(chr,sqrt2s)( sumsq, *norm ); \ + tsqrt2s( chr, sumsq, *norm ); \ return; \ } \ } \ @@ -391,11 +391,11 @@ void PASTEMAC(ch,varname) \ ); \ \ /* Compute: norm = scale * sqrt( sumsq ) */ \ - PASTEMAC(chr,sqrt2s)( sumsq, sqrt_sumsq ); \ - PASTEMAC(chr,scals)( scale, sqrt_sumsq ); \ + tsqrt2s( chr, sumsq, sqrt_sumsq ); \ + bli_tscals( chr, scale, sqrt_sumsq ); \ \ /* Store the final value to the output variable. */ \ - PASTEMAC(chr,copys)( sqrt_sumsq, *norm ); \ + bli_tcopys( chr, sqrt_sumsq, *norm ); \ } #else #define GENTFUNCR( ctype, ctype_r, ch, chr, varname, kername ) \ @@ -416,8 +416,8 @@ void PASTEMAC(ch,varname) \ ctype_r sqrt_sumsq; \ \ /* Initialize scale and sumsq to begin the summation. */ \ - PASTEMAC(chr,copys)( *zero, scale ); \ - PASTEMAC(chr,copys)( *one, sumsq ); \ + bli_tcopys( chr,chr, *zero, scale ); \ + bli_tcopys( chr,chr, *one, sumsq ); \ \ /* Compute the sum of the squares of the vector. */ \ \ @@ -432,11 +432,11 @@ void PASTEMAC(ch,varname) \ ); \ \ /* Compute: norm = scale * sqrt( sumsq ) */ \ - PASTEMAC(chr,sqrt2s)( sumsq, sqrt_sumsq ); \ - PASTEMAC(chr,scals)( scale, sqrt_sumsq ); \ + bli_tsqrt2s( chr,chr,chr, sumsq, sqrt_sumsq ); \ + bli_tscals( chr,chr,chr, scale, sqrt_sumsq ); \ \ /* Store the final value to the output variable. */ \ - PASTEMAC(chr,copys)( sqrt_sumsq, *norm ); \ + bli_tcopys( chr,chr, sqrt_sumsq, *norm ); \ } #endif GENTFUNCR( float, float, s, s, normfv_unb_var1, sumsqv_unb_var1 ) @@ -461,28 +461,28 @@ void PASTEMAC(ch,varname) \ dim_t i; \ \ /* Initialize the maximum absolute value to zero. */ \ - PASTEMAC(chr,set0s)( abs_chi1_max ); \ + bli_tset0s( chr, abs_chi1_max ); \ \ for ( i = 0; i < n; ++i ) \ { \ chi1 = x + (i )*incx; \ \ /* Compute the absolute value (or complex magnitude) of chi1. */ \ - PASTEMAC(ch,chr,abval2s)( *chi1, abs_chi1 ); \ + bli_tabval2s( ch,chr,chr, *chi1, abs_chi1 ); \ \ /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was larger than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ \ - if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \ + if ( abs_chi1_max < abs_chi1 || PASTEMAC(chr,isnan)( abs_chi1 ) ) \ { \ - PASTEMAC(chr,copys)( abs_chi1, abs_chi1_max ); \ + bli_tcopys( chr,chr, abs_chi1, abs_chi1_max ); \ } \ } \ \ /* Store the final value to the output variable. */ \ - PASTEMAC(chr,copys)( abs_chi1_max, *norm ); \ + bli_tcopys( chr,chr, abs_chi1_max, *norm ); \ } INSERT_GENTFUNCR_BASIC( normiv_unb_var1 ) @@ -520,12 +520,12 @@ void PASTEMAC(ch,varname) \ dim_t ij0, n_shift; \ \ /* Initialize the maximum absolute column sum to zero. */ \ - PASTEMAC(chr,set0s)( absum_max ); \ + bli_tset0s( chr, absum_max ); \ \ /* If either dimension is zero, return with absum_max equal to zero. */ \ if ( bli_zero_dim2( m, n ) ) \ { \ - PASTEMAC(chr,copys)( absum_max, *norm ); \ + bli_tcopys( chr,chr, absum_max, *norm ); \ return; \ } \ \ @@ -541,7 +541,7 @@ void PASTEMAC(ch,varname) \ /* If the matrix is zeros, return with absum_max equal to zero. */ \ if ( bli_is_zeros( uplox_eff ) ) \ { \ - PASTEMAC(chr,copys)( absum_max, *norm ); \ + bli_tcopys( chr,chr, absum_max, *norm ); \ return; \ } \ \ @@ -567,9 +567,9 @@ void PASTEMAC(ch,varname) \ \ /* If absum_j is greater than the previous maximum value, then save it. */ \ - if ( absum_max < absum_j || bli_isnan( absum_j ) ) \ + if ( absum_max < absum_j || PASTEMAC(chr,isnan)( absum_j ) ) \ { \ - PASTEMAC(chr,copys)( absum_j, absum_max ); \ + bli_tcopys( chr,chr, absum_j, absum_max ); \ } \ } \ } \ @@ -598,14 +598,14 @@ void PASTEMAC(ch,varname) \ \ /* Handle the diagonal element separately in case it's unit. */ \ - PASTEMAC(ch,chr,abval2s)( *chi1, abval_chi1 ); \ - PASTEMAC(chr,adds)( abval_chi1, absum_j ); \ + bli_tabval2s( ch,chr,chr, *chi1, abval_chi1 ); \ + bli_tadds( chr,chr,chr, abval_chi1, absum_j ); \ \ /* If absum_j is greater than the previous maximum value, then save it. */ \ - if ( absum_max < absum_j || bli_isnan( absum_j ) ) \ + if ( absum_max < absum_j || PASTEMAC(chr,isnan)( absum_j ) ) \ { \ - PASTEMAC(chr,copys)( absum_j, absum_max ); \ + bli_tcopys( chr,chr, absum_j, absum_max ); \ } \ } \ } \ @@ -633,21 +633,21 @@ void PASTEMAC(ch,varname) \ \ /* Handle the diagonal element separately in case it's unit. */ \ - PASTEMAC(ch,chr,abval2s)( *chi1, abval_chi1 ); \ - PASTEMAC(chr,adds)( abval_chi1, absum_j ); \ + bli_tabval2s( ch,chr,chr, *chi1, abval_chi1 ); \ + bli_tadds( chr,chr,chr, abval_chi1, absum_j ); \ \ /* If absum_j is greater than the previous maximum value, then save it. */ \ - if ( absum_max < absum_j || bli_isnan( absum_j ) ) \ + if ( absum_max < absum_j || PASTEMAC(chr,isnan)( absum_j ) ) \ { \ - PASTEMAC(chr,copys)( absum_j, absum_max ); \ + bli_tcopys( chr,chr, absum_j, absum_max ); \ } \ } \ } \ } \ \ /* Store final value of absum_max to the output variable. */ \ - PASTEMAC(chr,copys)( absum_max, *norm ); \ + bli_tcopys( chr,chr, absum_max, *norm ); \ } INSERT_GENTFUNCR_BASIC( norm1m_unb_var1, norm1v_unb_var1 ) @@ -688,7 +688,7 @@ void PASTEMAC(ch,varname) \ /* Return a norm of zero if either dimension is zero. */ \ if ( bli_zero_dim2( m, n ) ) \ { \ - PASTEMAC(chr,set0s)( *norm ); \ + bli_tset0s( chr, *norm ); \ return; \ } \ \ @@ -705,13 +705,13 @@ void PASTEMAC(ch,varname) \ /* Check the effective uplo; if it's zeros, then our norm is zero. */ \ if ( bli_is_zeros( uplox_eff ) ) \ { \ - PASTEMAC(chr,set0s)( *norm ); \ + bli_tset0s( chr, *norm ); \ return; \ } \ \ /* Initialize scale and sumsq to begin the summation. */ \ - PASTEMAC(chr,copys)( *zero_r, scale ); \ - PASTEMAC(chr,copys)( *one_r, sumsq ); \ + bli_tcopys( chr,chr, *zero_r, scale ); \ + bli_tcopys( chr,chr, *one_r, sumsq ); \ \ /* Handle dense and upper/lower storage cases separately. */ \ if ( bli_is_dense( uplox_eff ) ) \ @@ -810,11 +810,11 @@ void PASTEMAC(ch,varname) \ } \ \ /* Compute: norm = scale * sqrt( sumsq ) */ \ - PASTEMAC(chr,sqrt2s)( sumsq, sqrt_sumsq ); \ - PASTEMAC(chr,scals)( scale, sqrt_sumsq ); \ + bli_tsqrt2s( chr,chr,chr, sumsq, sqrt_sumsq ); \ + bli_tscals( chr,chr,chr, scale, sqrt_sumsq ); \ \ /* Store the final value to the output variable. */ \ - PASTEMAC(chr,copys)( sqrt_sumsq, *norm ); \ + bli_tcopys( chr,chr, sqrt_sumsq, *norm ); \ } INSERT_GENTFUNCR_BASIC( normfm_unb_var1, sumsqv_unb_var1 ) @@ -880,7 +880,7 @@ void PASTEMAC(ch,varname) \ \ for ( i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,randmac)( *chi1 ); \ + PASTEMAC(t,randmac)( ch, *chi1 ); \ \ chi1 += incx; \ } \ @@ -954,9 +954,9 @@ void PASTEMAC(ch,varname) \ { \ max_m_n = bli_max( m, n ); \ \ - PASTEMAC(d,ch,sets)( max_m_n, 0.0, omega ); \ - PASTEMAC(ch,copys)( *one, beta ); \ - PASTEMAC(ch,invscals)( omega, beta ); \ + bli_tsets( d,ch, max_m_n, 0.0, omega ); \ + bli_tcopys( ch,ch, *one, beta ); \ + bli_tinvscals( ch,ch,ch, omega, beta ); \ \ if ( bli_is_upper( uplox_eff ) ) \ { \ @@ -981,8 +981,8 @@ void PASTEMAC(ch,varname) \ ( void )chi1; \ /* We want positive diagonal elements between 1 and 2. */ \ /* - PASTEMAC(ch,abval2s)( *chi1, *chi1 ); \ - PASTEMAC(ch,adds)( *one, *chi1 ); \ + bli_tabval2s( ch,ch,ch, *chi1, *chi1 ); \ + bli_tadds( ch,ch,ch, *one, *chi1 ); \ */ \ \ /* Scale the super-diagonal elements by 1/max(m,n). */ \ @@ -1022,8 +1022,8 @@ void PASTEMAC(ch,varname) \ ( void )chi1; \ /* We want positive diagonal elements between 1 and 2. */ \ /* - PASTEMAC(ch,abval2s)( *chi1, *chi1 ); \ - PASTEMAC(ch,adds)( *one, *chi1 ); \ + bli_tabval2s( ch,ch,ch, *chi1, *chi1 ); \ + bli_tadds( ch,ch,ch, *one, *chi1 ); \ */ \ \ /* Scale the sub-diagonal elements by 1/max(m,n). */ \ @@ -1075,50 +1075,50 @@ void PASTEMAC(ch,varname) \ the Frobenius norm in netlib LAPACK's ?lassq(). */ \ \ /* Copy scale and sumsq to local variables. */ \ - PASTEMAC(chr,copys)( *scale, scale_r ); \ - PASTEMAC(chr,copys)( *sumsq, sumsq_r ); \ + bli_tcopys( chr,chr, *scale, scale_r ); \ + bli_tcopys( chr,chr, *sumsq, sumsq_r ); \ \ chi1 = x; \ \ for ( i = 0; i < n; ++i ) \ { \ /* Get the real and imaginary components of chi1. */ \ - PASTEMAC(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \ + bli_tgets( ch,chr, *chi1, chi1_r, chi1_i ); \ \ abs_chi1_r = bli_fabs( chi1_r ); \ abs_chi1_i = bli_fabs( chi1_i ); \ \ - if ( bli_isnan( abs_chi1_r ) ) \ + if ( PASTEMAC(chr,isnan)( abs_chi1_r ) ) \ { \ sumsq_r = abs_chi1_r; \ scale_r = one_r; \ } \ \ - if ( bli_isnan( abs_chi1_i ) ) \ + if ( PASTEMAC(chr,isnan)( abs_chi1_i ) ) \ { \ sumsq_r = abs_chi1_i; \ scale_r = one_r; \ } \ \ - if ( bli_isnan( sumsq_r ) ) \ + if ( PASTEMAC(chr,isnan)( sumsq_r ) ) \ { \ chi1 += incx; \ continue; \ } \ \ - if ( bli_isinf( abs_chi1_r ) ) \ + if ( PASTEMAC(chr,isinf)( abs_chi1_r ) ) \ { \ sumsq_r = abs_chi1_r; \ scale_r = one_r; \ } \ \ - if ( bli_isinf( abs_chi1_i ) ) \ + if ( PASTEMAC(chr,isinf)( abs_chi1_i ) ) \ { \ sumsq_r = abs_chi1_i; \ scale_r = one_r; \ } \ \ - if ( bli_isinf( sumsq_r ) ) \ + if ( PASTEMAC(chr,isinf)( sumsq_r ) ) \ { \ chi1 += incx; \ continue; \ @@ -1134,7 +1134,7 @@ void PASTEMAC(ch,varname) \ sumsq_r * ( scale_r / abs_chi1_r ) * \ ( scale_r / abs_chi1_r ); \ \ - PASTEMAC(chr,copys)( abs_chi1_r, scale_r ); \ + bli_tcopys( chr,chr, abs_chi1_r, scale_r ); \ } \ else \ { \ @@ -1153,7 +1153,7 @@ void PASTEMAC(ch,varname) \ sumsq_r * ( scale_r / abs_chi1_i ) * \ ( scale_r / abs_chi1_i ); \ \ - PASTEMAC(chr,copys)( abs_chi1_i, scale_r ); \ + bli_tcopys( chr,chr, abs_chi1_i, scale_r ); \ } \ else \ { \ @@ -1166,8 +1166,8 @@ void PASTEMAC(ch,varname) \ } \ \ /* Store final values of scale and sumsq to output variables. */ \ - PASTEMAC(chr,copys)( scale_r, *scale ); \ - PASTEMAC(chr,copys)( sumsq_r, *sumsq ); \ + bli_tcopys( chr,chr, scale_r, *scale ); \ + bli_tcopys( chr,chr, sumsq_r, *sumsq ); \ } INSERT_GENTFUNCR_BASIC( sumsqv_unb_var1 ) @@ -1192,10 +1192,10 @@ bool PASTEMAC(ch,opname) \ \ ctype chi1c; \ \ - if ( bli_is_conj( conjx ) ) { PASTEMAC(ch,copyjs)( *chi1, chi1c ); } \ - else { PASTEMAC(ch,copys)( *chi1, chi1c ); } \ + if ( bli_is_conj( conjx ) ) { bli_tcopyjs( ch,ch, *chi1, chi1c ); } \ + else { bli_tcopys( ch,ch, *chi1, chi1c ); } \ \ - if ( !PASTEMAC(ch,eq)( chi1c, *psi1 ) ) \ + if ( !PASTEMAC(t,eqs)( ch,ch,ch, chi1c, *psi1 ) ) \ return FALSE; \ } \ \ @@ -1264,10 +1264,10 @@ bool PASTEMAC(ch,opname) \ ctype* y11 = y1 + (i )*incy; \ ctype x11c; \ \ - if ( bli_is_conj( conjx ) ) { PASTEMAC(ch,copyjs)( *x11, x11c ); } \ - else { PASTEMAC(ch,copys)( *x11, x11c ); } \ + if ( bli_is_conj( conjx ) ) { bli_tcopyjs( ch,ch, *x11, x11c ); } \ + else { bli_tcopys( ch,ch, *x11, x11c ); } \ \ - if ( !PASTEMAC(ch,eq)( x11c, *y11 ) ) \ + if ( !PASTEMAC(t,eqs)( ch,ch,ch, x11c, *y11 ) ) \ return FALSE; \ } \ } \ @@ -1289,10 +1289,10 @@ bool PASTEMAC(ch,opname) \ ctype* y11 = y1 + (i )*incy; \ ctype x11c; \ \ - if ( bli_is_conj( conjx ) ) { PASTEMAC(ch,copyjs)( *x11, x11c ); } \ - else { PASTEMAC(ch,copys)( *x11, x11c ); } \ + if ( bli_is_conj( conjx ) ) { bli_tcopyjs( ch,ch, *x11, x11c ); } \ + else { bli_tcopys( ch,ch, *x11, x11c ); } \ \ - if ( !PASTEMAC(ch,eq)( x11c, *y11 ) ) \ + if ( !PASTEMAC(t,eqs)( ch,ch,ch, x11c, *y11 ) ) \ return FALSE; \ } \ } \ @@ -1313,10 +1313,10 @@ bool PASTEMAC(ch,opname) \ ctype* y11 = y1 + (i )*incy; \ ctype x11c; \ \ - if ( bli_is_conj( conjx ) ) { PASTEMAC(ch,copyjs)( *x11, x11c ); } \ - else { PASTEMAC(ch,copys)( *x11, x11c ); } \ + if ( bli_is_conj( conjx ) ) { bli_tcopyjs( ch,ch, *x11, x11c ); } \ + else { bli_tcopys( ch,ch, *x11, x11c ); } \ \ - if ( !PASTEMAC(ch,eq)( x11c, *y11 ) ) \ + if ( !PASTEMAC(t,eqs)( ch,ch,ch, x11c, *y11 ) ) \ return FALSE; \ } \ } \ @@ -1352,7 +1352,7 @@ void PASTEMAC(ch,opname) \ \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,fprints)( file, format, *chi1 ); \ + bli_tfprints( ch, file, format, *chi1 ); \ fprintf( file, "\n" ); \ \ chi1 += incx; \ @@ -1390,7 +1390,7 @@ void PASTEMAC(ch,opname) \ { \ const ctype* chi1 = (( ctype* ) x) + i*rs_x + j*cs_x; \ \ - PASTEMAC(ch,fprints)( file, format, *chi1 ); \ + bli_tfprints( ch, file, format, *chi1 ); \ fprintf( file, " " ); \ } \ \ diff --git a/kernels/armsve/1m/bli_dpackm_armsve256_int_8x10.c b/kernels/armsve/1m/bli_dpackm_armsve256_int_8x10.c index 1665b539c5..521f27577c 100644 --- a/kernels/armsve/1m/bli_dpackm_armsve256_int_8x10.c +++ b/kernels/armsve/1m/bli_dpackm_armsve256_int_8x10.c @@ -79,7 +79,7 @@ void bli_dpackm_armsve256_int_8x10 if ( cdim == mr && cdim_bcast == 1 ) { - if ( bli_deq1( *(( double* )kappa) ) ) + if ( bli_teq1s( d, *(( double* )kappa) ) ) { if ( inca == 1 ) // continous memory. packA style { @@ -183,8 +183,9 @@ void bli_dpackm_armsve256_int_8x10 } else { - bli_dscal2bbs_mxn + bli_tscal2bbs_mxn ( + d,d,d,d, conja, cdim_, n_, @@ -194,11 +195,12 @@ void bli_dpackm_armsve256_int_8x10 ); } - bli_dset0s_edge + bli_tset0s_edge ( + d, cdim_*cdim_bcast, cdim_max*cdim_bcast, n_, n_max_, - p, ldp + (double*)p, ldp ); } diff --git a/kernels/armsve/1m/bli_dpackm_armsve512_asm_16x10.c b/kernels/armsve/1m/bli_dpackm_armsve512_asm_16x10.c index 5981f392ab..ed96082a85 100644 --- a/kernels/armsve/1m/bli_dpackm_armsve512_asm_16x10.c +++ b/kernels/armsve/1m/bli_dpackm_armsve512_asm_16x10.c @@ -66,7 +66,7 @@ void bli_dpackm_armsve512_asm_16x10 const int64_t lda = lda_; const int64_t ldp = ldp_; const bool gs = inca != 1 && lda != 1; - const bool unitk = bli_deq1( *(( double* )kappa) ); + const bool unitk = bli_teq1s( d, *(( double* )kappa) ); // This never would have worked in the first place since GEMM packing used // BLIS_PACKED_ROW_PANELS and BLIS_PACKED_COL_PANELS, but with the removal @@ -566,8 +566,9 @@ void bli_dpackm_armsve512_asm_16x10 } else { - bli_dscal2bbs_mxn + bli_tscal2bbs_mxn ( + d,d,d,d, conja, cdim_, n_, @@ -577,10 +578,11 @@ void bli_dpackm_armsve512_asm_16x10 ); } - bli_dset0s_edge + bli_tset0s_edge ( + d, cdim_*cdim_bcast, cdim_max*cdim_bcast, n_, n_max_, - p, ldp + (double*)p, ldp ); } diff --git a/kernels/armv8a/1m/bli_packm_armv8a_int_d6x8.c b/kernels/armv8a/1m/bli_packm_armv8a_int_d6x8.c index 4242e4efdf..51fa48dd5f 100644 --- a/kernels/armv8a/1m/bli_packm_armv8a_int_d6x8.c +++ b/kernels/armv8a/1m/bli_packm_armv8a_int_d6x8.c @@ -91,7 +91,7 @@ void bli_dpackm_armv8a_int_6x8 // NOTE: If/when this kernel ever supports scaling by kappa within the // assembly region, this constraint should be lifted. - const bool unitk = bli_deq1( *(( double* )kappa) ); + const bool unitk = bli_teq1s( d, *(( double* )kappa) ); // ------------------------------------------------------------------------- @@ -478,8 +478,9 @@ void bli_dpackm_armv8a_int_6x8 } else { - bli_dscal2bbs_mxn + bli_tscal2bbs_mxn ( + d,d,d,d, conja, cdim0, k0, @@ -489,11 +490,12 @@ void bli_dpackm_armv8a_int_6x8 ); } - bli_dset0s_edge + bli_tset0s_edge ( + d, cdim0*cdim_bcast, cdim_max*cdim_bcast, k0, k0_max, - p, ldp + (double*)p, ldp ); } diff --git a/kernels/armv8a/1m/bli_packm_armv8a_int_s8x12.c b/kernels/armv8a/1m/bli_packm_armv8a_int_s8x12.c index df1e6178f4..805166327e 100644 --- a/kernels/armv8a/1m/bli_packm_armv8a_int_s8x12.c +++ b/kernels/armv8a/1m/bli_packm_armv8a_int_s8x12.c @@ -94,7 +94,7 @@ void bli_spackm_armv8a_int_8x12 // NOTE: If/when this kernel ever supports scaling by kappa within the // assembly region, this constraint should be lifted. - const bool unitk = bli_seq1( *(( float* )kappa) ); + const bool unitk = bli_teq1s( s, *(( float* )kappa) ); // ------------------------------------------------------------------------- @@ -617,8 +617,9 @@ void bli_spackm_armv8a_int_8x12 } else { - bli_sscal2bbs_mxn + bli_tscal2bbs_mxn ( + s,s,s,s, conja, cdim0, k0, @@ -628,11 +629,12 @@ void bli_spackm_armv8a_int_8x12 ); } - bli_sset0s_edge + bli_tset0s_edge ( + s, cdim0*cdim_bcast, cdim_max*cdim_bcast, k0, k0_max, - p, ldp + (float*)p, ldp ); } diff --git a/kernels/bgq/1/bli_dotv_bgq_int.c b/kernels/bgq/1/bli_dotv_bgq_int.c index 1774850032..c5a0b09013 100644 --- a/kernels/bgq/1/bli_dotv_bgq_int.c +++ b/kernels/bgq/1/bli_dotv_bgq_int.c @@ -53,7 +53,7 @@ void bli_ddotv_bgq_int // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { - PASTEMAC(d,set0s)( *rho ); + bli_tset0s( d, *rho ); return; } // If there is anything that would interfere with our use of aligned diff --git a/kernels/bgq/1f/bli_axpyf_bgq_int.c b/kernels/bgq/1f/bli_axpyf_bgq_int.c index 1bf82380ae..f3b92220b5 100644 --- a/kernels/bgq/1f/bli_axpyf_bgq_int.c +++ b/kernels/bgq/1f/bli_axpyf_bgq_int.c @@ -110,14 +110,14 @@ void bli_daxpyf_bgq_int double chi6 = *(x + 6*incx); double chi7 = *(x + 7*incx); - PASTEMAC(d,d,scals)( *alpha, chi0 ); - PASTEMAC(d,d,scals)( *alpha, chi1 ); - PASTEMAC(d,d,scals)( *alpha, chi2 ); - PASTEMAC(d,d,scals)( *alpha, chi3 ); - PASTEMAC(d,d,scals)( *alpha, chi4 ); - PASTEMAC(d,d,scals)( *alpha, chi5 ); - PASTEMAC(d,d,scals)( *alpha, chi6 ); - PASTEMAC(d,d,scals)( *alpha, chi7 ); + bli_tscals( d,d,d, *alpha, chi0 ); + bli_tscals( d,d,d, *alpha, chi1 ); + bli_tscals( d,d,d, *alpha, chi2 ); + bli_tscals( d,d,d, *alpha, chi3 ); + bli_tscals( d,d,d, *alpha, chi4 ); + bli_tscals( d,d,d, *alpha, chi5 ); + bli_tscals( d,d,d, *alpha, chi6 ); + bli_tscals( d,d,d, *alpha, chi7 ); vector4double a0v, a1v, a2v, a3v, a4v, a5v, a6v, a7v; vector4double yv; diff --git a/kernels/haswell/1m/bli_packm_haswell_asm_c3x8.c b/kernels/haswell/1m/bli_packm_haswell_asm_c3x8.c index 87ddb79579..aa67025237 100644 --- a/kernels/haswell/1m/bli_packm_haswell_asm_c3x8.c +++ b/kernels/haswell/1m/bli_packm_haswell_asm_c3x8.c @@ -91,7 +91,7 @@ void bli_cpackm_haswell_asm_3x8 // NOTE: If/when this kernel ever supports scaling by kappa within the // assembly region, this constraint should be lifted. - const bool unitk = bli_ceq1( *(( scomplex* )kappa) ); + const bool unitk = bli_teq1s( c, *(( scomplex* )kappa) ); // ------------------------------------------------------------------------- @@ -590,8 +590,9 @@ void bli_cpackm_haswell_asm_3x8 } else { - bli_cscal2bbs_mxn + bli_tscal2bbs_mxn ( + c,c,c,c, conja, cdim0, k0, @@ -601,11 +602,12 @@ void bli_cpackm_haswell_asm_3x8 ); } - bli_cset0s_edge + bli_tset0s_edge ( + c, cdim0*cdim_bcast, cdim_max*cdim_bcast, k0, k0_max, - p, ldp + (scomplex*)p, ldp ); } diff --git a/kernels/haswell/1m/bli_packm_haswell_asm_d6x8.c b/kernels/haswell/1m/bli_packm_haswell_asm_d6x8.c index ef6d66987f..02aba0cb66 100644 --- a/kernels/haswell/1m/bli_packm_haswell_asm_d6x8.c +++ b/kernels/haswell/1m/bli_packm_haswell_asm_d6x8.c @@ -91,7 +91,7 @@ void bli_dpackm_haswell_asm_6x8 // NOTE: If/when this kernel ever supports scaling by kappa within the // assembly region, this constraint should be lifted. - const bool unitk = bli_deq1( *(( double* )kappa) ); + const bool unitk = bli_teq1s( d, *(( double* )kappa) ); // ------------------------------------------------------------------------- @@ -588,8 +588,9 @@ void bli_dpackm_haswell_asm_6x8 } else { - bli_dscal2bbs_mxn + bli_tscal2bbs_mxn ( + d,d,d,d, conja, cdim0, k0, @@ -599,11 +600,12 @@ void bli_dpackm_haswell_asm_6x8 ); } - bli_dset0s_edge + bli_tset0s_edge ( + d, cdim0*cdim_bcast, cdim_max*cdim_bcast, k0, k0_max, - p, ldp + (double*)p, ldp ); } diff --git a/kernels/haswell/1m/bli_packm_haswell_asm_s6x16.c b/kernels/haswell/1m/bli_packm_haswell_asm_s6x16.c index fbab3983db..614a51740a 100644 --- a/kernels/haswell/1m/bli_packm_haswell_asm_s6x16.c +++ b/kernels/haswell/1m/bli_packm_haswell_asm_s6x16.c @@ -91,7 +91,7 @@ void bli_spackm_haswell_asm_6x16 // NOTE: If/when this kernel ever supports scaling by kappa within the // assembly region, this constraint should be lifted. - const bool unitk = bli_seq1( *(( float* )kappa) ); + const bool unitk = bli_teq1s( s, *(( float* )kappa) ); // ------------------------------------------------------------------------- @@ -789,8 +789,9 @@ void bli_spackm_haswell_asm_6x16 } else { - bli_sscal2bbs_mxn + bli_tscal2bbs_mxn ( + s,s,s,s, conja, cdim0, k0, @@ -800,11 +801,12 @@ void bli_spackm_haswell_asm_6x16 ); } - bli_sset0s_edge + bli_tset0s_edge ( + s, cdim0*cdim_bcast, cdim_max*cdim_bcast, k0, k0_max, - p, ldp + (float*)p, ldp ); } diff --git a/kernels/haswell/1m/bli_packm_haswell_asm_z3x4.c b/kernels/haswell/1m/bli_packm_haswell_asm_z3x4.c index e5d9da4f33..6706bacb68 100644 --- a/kernels/haswell/1m/bli_packm_haswell_asm_z3x4.c +++ b/kernels/haswell/1m/bli_packm_haswell_asm_z3x4.c @@ -91,7 +91,7 @@ void bli_zpackm_haswell_asm_3x4 // NOTE: If/when this kernel ever supports scaling by kappa within the // assembly region, this constraint should be lifted. - const bool unitk = bli_zeq1( *(( dcomplex* )kappa) ); + const bool unitk = bli_teq1s( z, *(( dcomplex* )kappa) ); // ------------------------------------------------------------------------- @@ -592,8 +592,9 @@ void bli_zpackm_haswell_asm_3x4 } else { - bli_zscal2bbs_mxn + bli_tscal2bbs_mxn ( + z,z,z,z, conja, cdim0, k0, @@ -603,11 +604,12 @@ void bli_zpackm_haswell_asm_3x4 ); } - bli_zset0s_edge + bli_tset0s_edge ( + z, cdim0*cdim_bcast, cdim_max*cdim_bcast, k0, k0_max, - p, ldp + (dcomplex*)p, ldp ); } diff --git a/kernels/haswell/3/sup/d6x8/bli_gemmsup_r_haswell_ref_dMx1.c b/kernels/haswell/3/sup/d6x8/bli_gemmsup_r_haswell_ref_dMx1.c index ca15842c4d..9494ce2362 100644 --- a/kernels/haswell/3/sup/d6x8/bli_gemmsup_r_haswell_ref_dMx1.c +++ b/kernels/haswell/3/sup/d6x8/bli_gemmsup_r_haswell_ref_dMx1.c @@ -125,7 +125,7 @@ void PASTEMAC(ch,opname) \ const ctype* bj = b /*[ j*cs_b ]*/ ; \ ctype ab; \ \ - PASTEMAC(ch,set0s)( ab ); \ + bli_tset0s( ch, ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ @@ -133,23 +133,23 @@ void PASTEMAC(ch,opname) \ const ctype* aij = &ai[ l*cs_a ]; \ const ctype* bij = &bj[ l*rs_b ]; \ \ - PASTEMAC(ch,dots)( *aij, *bij, ab ); \ + bli_tdots( ch,ch,ch,ch, *aij, *bij, ab ); \ } \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ - if ( PASTEMAC(ch,eq1)( *beta ) ) \ + if ( bli_teq1s( ch, *beta ) ) \ { \ - PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ - else if ( PASTEMAC(d,eq0)( *beta ) ) \ + else if ( bli_teq0s( d, *beta ) ) \ { \ - PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ else \ { \ - PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ + bli_taxpbys( ch,ch,ch,ch,ch, *alpha, ab, *beta, *cij ); \ } \ } \ } \ diff --git a/kernels/haswell/3/sup/s6x16/bli_gemmsup_r_haswell_ref_sMx1.c b/kernels/haswell/3/sup/s6x16/bli_gemmsup_r_haswell_ref_sMx1.c index 9966283df9..a3a646342c 100644 --- a/kernels/haswell/3/sup/s6x16/bli_gemmsup_r_haswell_ref_sMx1.c +++ b/kernels/haswell/3/sup/s6x16/bli_gemmsup_r_haswell_ref_sMx1.c @@ -124,7 +124,7 @@ void PASTEMAC(ch,opname) \ const ctype* bj = b /*[ j*cs_b ]*/ ; \ ctype ab; \ \ - PASTEMAC(ch,set0s)( ab ); \ + bli_tset0s( ch, ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ @@ -132,23 +132,23 @@ void PASTEMAC(ch,opname) \ const ctype* aij = &ai[ l*cs_a ]; \ const ctype* bij = &bj[ l*rs_b ]; \ \ - PASTEMAC(ch,dots)( *aij, *bij, ab ); \ + bli_tdots( ch,ch,ch,ch, *aij, *bij, ab ); \ } \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ - if ( PASTEMAC(ch,eq1)( *beta ) ) \ + if ( bli_teq1s( ch, *beta ) ) \ { \ - PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ - else if ( PASTEMAC(d,eq0)( *beta ) ) \ + else if ( bli_teq0s( d, *beta ) ) \ { \ - PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ else \ { \ - PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ + bli_taxpbys( ch,ch,ch,ch,ch, *alpha, ab, *beta, *cij ); \ } \ } \ } \ diff --git a/kernels/knl/1m/bli_dpackm_knl_asm_24x8.c b/kernels/knl/1m/bli_dpackm_knl_asm_24x8.c index b081ab3d0b..96b57ee77c 100644 --- a/kernels/knl/1m/bli_dpackm_knl_asm_24x8.c +++ b/kernels/knl/1m/bli_dpackm_knl_asm_24x8.c @@ -544,8 +544,9 @@ void bli_dpackm_knl_asm_24x8 } else { - bli_dscal2bbs_mxn + bli_tscal2bbs_mxn ( + d,d,d,d, BLIS_NO_CONJUGATE, cdim, n, @@ -555,10 +556,11 @@ void bli_dpackm_knl_asm_24x8 ); } - bli_dset0s_edge + bli_tset0s_edge ( + d, cdim*cdim_bcast, cdim_max*cdim_bcast, n, n_max, - p, ldp + (double*)p, ldp ); } diff --git a/kernels/knl/1m/bli_spackm_knl_asm_24x16.c b/kernels/knl/1m/bli_spackm_knl_asm_24x16.c index 78b41ae229..7ef428dac2 100644 --- a/kernels/knl/1m/bli_spackm_knl_asm_24x16.c +++ b/kernels/knl/1m/bli_spackm_knl_asm_24x16.c @@ -561,8 +561,9 @@ void bli_spackm_knl_asm_24x16 } else { - bli_sscal2bbs_mxn + bli_tscal2bbs_mxn ( + s,s,s,s, BLIS_NO_CONJUGATE, cdim, n, @@ -572,10 +573,11 @@ void bli_spackm_knl_asm_24x16 ); } - bli_sset0s_edge + bli_tset0s_edge ( + s, cdim*cdim_bcast, cdim_max*cdim_bcast, n, n_max, - p, ldp + (float*)p, ldp ); } diff --git a/kernels/penryn/1/bli_dotv_penryn_int.c b/kernels/penryn/1/bli_dotv_penryn_int.c index 83f44309f3..ca12911fca 100644 --- a/kernels/penryn/1/bli_dotv_penryn_int.c +++ b/kernels/penryn/1/bli_dotv_penryn_int.c @@ -73,7 +73,7 @@ void bli_ddotv_penryn_int // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { - PASTEMAC(d,set0s)( *rho_cast ); + bli_tset0s( d, *rho_cast ); return; } @@ -122,7 +122,7 @@ void bli_ddotv_penryn_int const double* restrict x1 = x_cast; const double* restrict y1 = y_cast; - PASTEMAC(d,set0s)( rho1 ); + bli_tset0s( d, rho1 ); if ( n_pre == 1 ) { @@ -166,5 +166,5 @@ void bli_ddotv_penryn_int } } - PASTEMAC(d,copys)( rho1, *rho_cast ); + bli_tcopys( d,d, rho1, *rho_cast ); } diff --git a/kernels/penryn/1f/bli_axpyf_penryn_int.c b/kernels/penryn/1f/bli_axpyf_penryn_int.c index 3ac75f424e..859c26e26b 100644 --- a/kernels/penryn/1f/bli_axpyf_penryn_int.c +++ b/kernels/penryn/1f/bli_axpyf_penryn_int.c @@ -144,10 +144,10 @@ void bli_daxpyf_penryn_int chi2 = *(x_cast + 2*incx); chi3 = *(x_cast + 3*incx); - PASTEMAC(d,d,scals)( *alpha_cast, chi0 ); - PASTEMAC(d,d,scals)( *alpha_cast, chi1 ); - PASTEMAC(d,d,scals)( *alpha_cast, chi2 ); - PASTEMAC(d,d,scals)( *alpha_cast, chi3 ); + bli_tscals( d,d,d, *alpha_cast, chi0 ); + bli_tscals( d,d,d, *alpha_cast, chi1 ); + bli_tscals( d,d,d, *alpha_cast, chi2 ); + bli_tscals( d,d,d, *alpha_cast, chi3 ); if ( m_pre == 1 ) { diff --git a/kernels/penryn/1f/bli_dotaxpyv_penryn_int.c b/kernels/penryn/1f/bli_dotaxpyv_penryn_int.c index eab3c0bb0a..a65740a93a 100644 --- a/kernels/penryn/1f/bli_dotaxpyv_penryn_int.c +++ b/kernels/penryn/1f/bli_dotaxpyv_penryn_int.c @@ -79,7 +79,7 @@ void bli_ddotaxpyv_penryn_int // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { - PASTEMAC(d,set0s)( *rho_cast ); + bli_tset0s( d, *rho_cast ); return; } @@ -138,7 +138,7 @@ void bli_ddotaxpyv_penryn_int //stepy = 2 * incy; //stepz = 2 * incz; - PASTEMAC(d,set0s)( rho1c ); + bli_tset0s( d, rho1c ); alpha1c = *alpha_cast; diff --git a/kernels/penryn/1f/bli_dotxaxpyf_penryn_int.c b/kernels/penryn/1f/bli_dotxaxpyf_penryn_int.c index 0148d3f924..96c753aa0d 100644 --- a/kernels/penryn/1f/bli_dotxaxpyf_penryn_int.c +++ b/kernels/penryn/1f/bli_dotxaxpyf_penryn_int.c @@ -182,15 +182,15 @@ void bli_ddotxaxpyf_penryn_int chi2 = *(x_cast + 2*incx); chi3 = *(x_cast + 3*incx); - PASTEMAC(d,d,scals)( *alpha_cast, chi0 ); - PASTEMAC(d,d,scals)( *alpha_cast, chi1 ); - PASTEMAC(d,d,scals)( *alpha_cast, chi2 ); - PASTEMAC(d,d,scals)( *alpha_cast, chi3 ); - - PASTEMAC(d,set0s)( rho0 ); - PASTEMAC(d,set0s)( rho1 ); - PASTEMAC(d,set0s)( rho2 ); - PASTEMAC(d,set0s)( rho3 ); + bli_tscals( d,d,d, *alpha_cast, chi0 ); + bli_tscals( d,d,d, *alpha_cast, chi1 ); + bli_tscals( d,d,d, *alpha_cast, chi2 ); + bli_tscals( d,d,d, *alpha_cast, chi3 ); + + bli_tset0s( d, rho0 ); + bli_tset0s( d, rho1 ); + bli_tset0s( d, rho2 ); + bli_tset0s( d, rho3 ); if ( m_pre == 1 ) { diff --git a/kernels/penryn/1f/bli_dotxf_penryn_int.c b/kernels/penryn/1f/bli_dotxf_penryn_int.c index 282587b58f..530827ec21 100644 --- a/kernels/penryn/1f/bli_dotxf_penryn_int.c +++ b/kernels/penryn/1f/bli_dotxf_penryn_int.c @@ -157,10 +157,10 @@ void bli_ddotxf_penryn_int const double* restrict x3 = a_cast + 3*lda; const double* restrict y0 = x_cast; - PASTEMAC(d,set0s)( rho0 ); - PASTEMAC(d,set0s)( rho1 ); - PASTEMAC(d,set0s)( rho2 ); - PASTEMAC(d,set0s)( rho3 ); + bli_tset0s( d, rho0 ); + bli_tset0s( d, rho1 ); + bli_tset0s( d, rho2 ); + bli_tset0s( d, rho3 ); if ( m_pre == 1 ) { @@ -269,15 +269,15 @@ void bli_ddotxf_penryn_int } } /* - PASTEMAC(d,d,scals)( *beta_cast, *(y_cast ) ); \ - PASTEMAC(d,d,scals)( *beta_cast, *(y_cast+1) ); \ - PASTEMAC(d,d,scals)( *beta_cast, *(y_cast+2) ); \ - PASTEMAC(d,d,scals)( *beta_cast, *(y_cast+3) ); \ - - PASTEMAC(d,d,d,axpys)( *alpha_cast, rho1, *(y_cast ) ); \ - PASTEMAC(d,d,d,axpys)( *alpha_cast, rho2, *(y_cast+1) ); \ - PASTEMAC(d,d,d,axpys)( *alpha_cast, rho3, *(y_cast+2) ); \ - PASTEMAC(d,d,d,axpys)( *alpha_cast, rho4, *(y_cast+3) ); \ + bli_tscals( d,d,d, *beta_cast, *(y_cast ) ); \ + bli_tscals( d,d,d, *beta_cast, *(y_cast+1) ); \ + bli_tscals( d,d,d, *beta_cast, *(y_cast+2) ); \ + bli_tscals( d,d,d, *beta_cast, *(y_cast+3) ); \ + + bli_taxpys( d,d,d,d, *alpha_cast, rho1, *(y_cast ) ); \ + bli_taxpys( d,d,d,d, *alpha_cast, rho2, *(y_cast+1) ); \ + bli_taxpys( d,d,d,d, *alpha_cast, rho3, *(y_cast+2) ); \ + bli_taxpys( d,d,d,d, *alpha_cast, rho4, *(y_cast+3) ); \ */ rho1v.d[0] = rho0; diff --git a/kernels/zen/1/bli_amaxv_zen_int.c b/kernels/zen/1/bli_amaxv_zen_int.c index 028e4d6ba0..e18ecdff99 100644 --- a/kernels/zen/1/bli_amaxv_zen_int.c +++ b/kernels/zen/1/bli_amaxv_zen_int.c @@ -123,17 +123,17 @@ void bli_samaxv_zen_int the behavior of netlib BLAS's i?amax() routines. */ if ( bli_zero_dim1( n ) ) { - PASTEMAC(i,copys)( *zero_i, *index ); + bli_tcopys( i,i, *zero_i, *index ); return; } /* Initialize the index of the maximum absolute value to zero. */ - PASTEMAC(i,copys)( *zero_i, i_max_l ); + bli_tcopys( i,i, *zero_i, i_max_l ); /* Initialize the maximum absolute value search candidate with -1, which is guaranteed to be less than all values we will compute. */ - PASTEMAC(s,copys)( *minus_one, abs_chi1_max ); + bli_tcopys( s,s, *minus_one, abs_chi1_max ); // For non-unit strides, or very small vector lengths, compute with // scalar code. @@ -290,17 +290,17 @@ void bli_damaxv_zen_int the behavior of netlib BLAS's i?amax() routines. */ if ( bli_zero_dim1( n ) ) { - PASTEMAC(i,copys)( *zero_i, *index ); + bli_tcopys( i,i, *zero_i, *index ); return; } /* Initialize the index of the maximum absolute value to zero. */ \ - PASTEMAC(i,copys)( *zero_i, i_max_l ); + bli_tcopys( i,i, *zero_i, i_max_l ); /* Initialize the maximum absolute value search candidate with -1, which is guaranteed to be less than all values we will compute. */ - PASTEMAC(d,copys)( *minus_one, abs_chi1_max ); + bli_tcopys( d,d, *minus_one, abs_chi1_max ); // For non-unit strides, or very small vector lengths, compute with // scalar code. @@ -446,7 +446,7 @@ void PASTEMAC(ch,varname) \ dim_t i; \ \ /* Initialize the index of the maximum absolute value to zero. */ \ - PASTEMAC(i,copys)( zero_i, *index ); \ + bli_tcopys( i,i, zero_i, *index ); \ \ /* If the vector length is zero, return early. This directly emulates the behavior of netlib BLAS's i?amax() routines. */ \ @@ -455,30 +455,30 @@ void PASTEMAC(ch,varname) \ /* Initialize the maximum absolute value search candidate with -1, which is guaranteed to be less than all values we will compute. */ \ - PASTEMAC(chr,copys)( *minus_one, abs_chi1_max ); \ + bli_tcopys( chr, *minus_one, abs_chi1_max ); \ \ if ( incx == 1 ) \ { \ for ( i = 0; i < n; ++i ) \ { \ /* Get the real and imaginary components of chi1. */ \ - PASTEMAC(ch,chr,gets)( x[i], chi1_r, chi1_i ); \ + bli_tgets( ch,chr, x[i], chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ - PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \ - PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \ + tabval2s( chr, chi1_r, chi1_r ); \ + tabval2s( chr, chi1_i, chi1_i ); \ \ /* Add the real and imaginary absolute values together. */ \ - PASTEMAC(chr,set0s)( abs_chi1 ); \ - PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \ - PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \ + bli_tset0s( chr, abs_chi1 ); \ + bli_tadds( chr, chi1_r, abs_chi1 ); \ + bli_tadds( chr, chi1_i, abs_chi1 ); \ \ /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ \ - if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \ + if ( abs_chi1_max < abs_chi1 || PASTEMAC(chr,isnan)( abs_chi1 ) ) \ { \ abs_chi1_max = abs_chi1; \ *index = i; \ @@ -492,23 +492,23 @@ void PASTEMAC(ch,varname) \ ctype* chi1 = x + (i )*incx; \ \ /* Get the real and imaginary components of chi1. */ \ - PASTEMAC(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \ + bli_tgets( ch,chr, *chi1, chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ - PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \ - PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \ + tabval2s( chr, chi1_r, chi1_r ); \ + tabval2s( chr, chi1_i, chi1_i ); \ \ /* Add the real and imaginary absolute values together. */ \ - PASTEMAC(chr,set0s)( abs_chi1 ); \ - PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \ - PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \ + bli_tset0s( chr, abs_chi1 ); \ + bli_tadds( chr, chi1_r, abs_chi1 ); \ + bli_tadds( chr, chi1_i, abs_chi1 ); \ \ /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ \ - if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \ + if ( abs_chi1_max < abs_chi1 || PASTEMAC(chr,isnan)( abs_chi1 ) ) \ { \ abs_chi1_max = abs_chi1; \ *index = i; \ diff --git a/kernels/zen/1/bli_axpyv_zen_int.c b/kernels/zen/1/bli_axpyv_zen_int.c index 6212f1745a..019cef60ee 100644 --- a/kernels/zen/1/bli_axpyv_zen_int.c +++ b/kernels/zen/1/bli_axpyv_zen_int.c @@ -81,7 +81,7 @@ void bli_saxpyv_zen_int v8sf_t y0v, y1v, y2v, y3v; // If the vector dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim1( n ) || PASTEMAC(s,eq0)( *alpha ) ) return; + if ( bli_zero_dim1( n ) || bli_teq0s( s, *alpha ) ) return; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. @@ -186,7 +186,7 @@ void bli_daxpyv_zen_int v4df_t y0v, y1v, y2v, y3v; // If the vector dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim1( n ) || PASTEMAC(d,eq0)( *alpha ) ) return; + if ( bli_zero_dim1( n ) || bli_teq0s( d, *alpha ) ) return; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. diff --git a/kernels/zen/1/bli_axpyv_zen_int10.c b/kernels/zen/1/bli_axpyv_zen_int10.c index 96b8e5f705..6060b63653 100644 --- a/kernels/zen/1/bli_axpyv_zen_int10.c +++ b/kernels/zen/1/bli_axpyv_zen_int10.c @@ -79,7 +79,7 @@ void bli_saxpyv_zen_int10 __m256 zv[10]; // If the vector dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim1( n ) || PASTEMAC(s,eq0)( *alpha ) ) return; + if ( bli_zero_dim1( n ) || bli_teq0s( s, *alpha ) ) return; // Initialize local pointers. const float* restrict xp = x; @@ -286,7 +286,7 @@ void bli_daxpyv_zen_int10 __m256d zv[10]; // If the vector dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim1( n ) || PASTEMAC(d,eq0)( *alpha ) ) return; + if ( bli_zero_dim1( n ) || bli_teq0s( d, *alpha ) ) return; // Initialize local pointers. const double* restrict xp = x; diff --git a/kernels/zen/1/bli_dotv_zen_int.c b/kernels/zen/1/bli_dotv_zen_int.c index 866817b5d3..90ee38842b 100644 --- a/kernels/zen/1/bli_dotv_zen_int.c +++ b/kernels/zen/1/bli_dotv_zen_int.c @@ -87,7 +87,7 @@ void bli_sdotv_zen_int // If the vector dimension is zero, set rho to zero and return early. if ( bli_zero_dim1( n ) ) { - PASTEMAC(s,set0s)( *rho ); + bli_tset0s( s, *rho ); return; } @@ -110,7 +110,7 @@ void bli_sdotv_zen_int const float* restrict yp = y; // Initialize the local scalar rho1 to zero. - PASTEMAC(s,set0s)( rho_l ); + bli_tset0s( s, rho_l ); // Initialize the unrolled iterations' rho vectors to zero. rho0v.v = _mm256_setzero_ps(); @@ -173,7 +173,7 @@ void bli_sdotv_zen_int } // Copy the final result into the output variable. - PASTEMAC(s,copys)( rho_l, *rho ); + bli_tcopys( s,s, rho_l, *rho ); } // ----------------------------------------------------------------------------- @@ -211,7 +211,7 @@ void bli_ddotv_zen_int // If the vector dimension is zero, set rho to zero and return early. if ( bli_zero_dim1( n ) ) { - PASTEMAC(d,set0s)( *rho ); + bli_tset0s( d, *rho ); return; } @@ -234,7 +234,7 @@ void bli_ddotv_zen_int const double* restrict yp = y; // Initialize the local scalar rho1 to zero. - PASTEMAC(d,set0s)( rho_l ); + bli_tset0s( d, rho_l ); // Initialize the unrolled iterations' rho vectors to zero. rho0v.v = _mm256_setzero_pd(); @@ -296,6 +296,6 @@ void bli_ddotv_zen_int } // Copy the final result into the output variable. - PASTEMAC(d,copys)( rho_l, *rho ); + bli_tcopys( d,d, rho_l, *rho ); } diff --git a/kernels/zen/1/bli_dotv_zen_int10.c b/kernels/zen/1/bli_dotv_zen_int10.c index 9d8efdec30..c5211a54fc 100644 --- a/kernels/zen/1/bli_dotv_zen_int10.c +++ b/kernels/zen/1/bli_dotv_zen_int10.c @@ -83,7 +83,7 @@ void bli_sdotv_zen_int10 // If the vector dimension is zero, or if alpha is zero, return early. if ( bli_zero_dim1( n ) ) { - PASTEMAC(s,set0s)( *rho ); + bli_tset0s( s, *rho ); return; } @@ -91,7 +91,7 @@ void bli_sdotv_zen_int10 const float* restrict xp = x; const float* restrict yp = y; - PASTEMAC(s,set0s)( rho_l ); + bli_tset0s( s, rho_l ); if ( incx == 1 && incy == 1 ) { @@ -242,7 +242,7 @@ void bli_sdotv_zen_int10 } // Copy the final result into the output variable. - PASTEMAC(s,copys)( rho_l, *rho ); + bli_tcopys( s,s, rho_l, *rho ); } // ----------------------------------------------------------------------------- @@ -275,7 +275,7 @@ void bli_ddotv_zen_int10 // If the vector dimension is zero, or if alpha is zero, return early. if ( bli_zero_dim1( n ) ) { - PASTEMAC(d,set0s)( *rho ); + bli_tset0s( d, *rho ); return; } @@ -283,7 +283,7 @@ void bli_ddotv_zen_int10 const double* restrict xp = x; const double* restrict yp = y; - PASTEMAC(d,set0s)( rho_l ); + bli_tset0s( d, rho_l ); if ( incx == 1 && incy == 1 ) { @@ -455,6 +455,6 @@ void bli_ddotv_zen_int10 } // Copy the final result into the output variable. - PASTEMAC(d,copys)( rho_l, *rho ); + bli_tcopys( d,d, rho_l, *rho ); } diff --git a/kernels/zen/1/bli_dotxv_zen_int.c b/kernels/zen/1/bli_dotxv_zen_int.c index 3e41be8797..0bb236c31d 100644 --- a/kernels/zen/1/bli_dotxv_zen_int.c +++ b/kernels/zen/1/bli_dotxv_zen_int.c @@ -90,17 +90,17 @@ void bli_sdotxv_zen_int // If beta is zero, initialize rho1 to zero instead of scaling // rho by beta (in case rho contains NaN or Inf). - if ( PASTEMAC(s,eq0)( *beta ) ) + if ( bli_teq0s( s, *beta ) ) { - PASTEMAC(s,set0s)( *rho ); + bli_tset0s( s, *rho ); } else { - PASTEMAC(s,scals)( *beta, *rho ); + bli_tscals( s,s,s, *beta, *rho ); } // If the vector dimension is zero, output rho and return early. - if ( bli_zero_dim1( n ) || PASTEMAC(s,eq0)( *alpha ) ) return; + if ( bli_zero_dim1( n ) || bli_teq0s( s, *alpha ) ) return; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. @@ -181,7 +181,7 @@ void bli_sdotxv_zen_int } // Accumulate the final result into the output variable. - PASTEMAC(s,axpys)( *alpha, rho_l, *rho ); + bli_taxpys( s,s,s,s, *alpha, rho_l, *rho ); } // ----------------------------------------------------------------------------- @@ -222,17 +222,17 @@ void bli_ddotxv_zen_int // If beta is zero, initialize rho1 to zero instead of scaling // rho by beta (in case rho contains NaN or Inf). - if ( PASTEMAC(d,eq0)( *beta ) ) + if ( bli_teq0s( d, *beta ) ) { - PASTEMAC(d,set0s)( *rho ); + bli_tset0s( d, *rho ); } else { - PASTEMAC(d,scals)( *beta, *rho ); + bli_tscals( d,d,d, *beta, *rho ); } // If the vector dimension is zero, output rho and return early. - if ( bli_zero_dim1( n ) || PASTEMAC(d,eq0)( *alpha ) ) return; + if ( bli_zero_dim1( n ) || bli_teq0s( d, *alpha ) ) return; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. @@ -312,6 +312,6 @@ void bli_ddotxv_zen_int } // Accumulate the final result into the output variable. - PASTEMAC(d,axpys)( *alpha, rho_l, *rho ); + bli_taxpys( d,d,d,d, *alpha, rho_l, *rho ); } diff --git a/kernels/zen/1/bli_scalv_zen_int.c b/kernels/zen/1/bli_scalv_zen_int.c index 4dd8b0b5e3..ac9ce5e6bf 100644 --- a/kernels/zen/1/bli_scalv_zen_int.c +++ b/kernels/zen/1/bli_scalv_zen_int.c @@ -78,10 +78,10 @@ void bli_sscalv_zen_int v8sf_t x0v, x1v, x2v, x3v; // If the vector dimension is zero, or if alpha is unit, return early. - if ( bli_zero_dim1( n ) || PASTEMAC(s,eq1)( *alpha ) ) return; + if ( bli_zero_dim1( n ) || bli_teq1s( s, *alpha ) ) return; // If alpha is zero, use setv (in case y contains NaN or Inf). - if ( PASTEMAC(s,eq0)( *alpha ) ) + if ( bli_teq0s( s, *alpha ) ) { void* zero = bli_s0; setv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_FLOAT, BLIS_SETV_KER, cntx ); @@ -178,10 +178,10 @@ void bli_dscalv_zen_int v4df_t x0v, x1v, x2v, x3v; // If the vector dimension is zero, or if alpha is unit, return early. - if ( bli_zero_dim1( n ) || PASTEMAC(d,eq1)( *alpha ) ) return; + if ( bli_zero_dim1( n ) || bli_teq1s( d, *alpha ) ) return; // If alpha is zero, use setv (in case y contains NaN or Inf). - if ( PASTEMAC(d,eq0)( *alpha ) ) + if ( bli_teq0s( d, *alpha ) ) { void* zero = bli_d0; setv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_DOUBLE, BLIS_SETV_KER, cntx ); diff --git a/kernels/zen/1/bli_scalv_zen_int10.c b/kernels/zen/1/bli_scalv_zen_int10.c index 06099b8e05..9f5fb2587c 100644 --- a/kernels/zen/1/bli_scalv_zen_int10.c +++ b/kernels/zen/1/bli_scalv_zen_int10.c @@ -78,10 +78,10 @@ void bli_sscalv_zen_int10 __m256 zv[10]; // If the vector dimension is zero, or if alpha is unit, return early. - if ( bli_zero_dim1( n ) || PASTEMAC(s,eq1)( *alpha ) ) return; + if ( bli_zero_dim1( n ) || bli_teq1s( s, *alpha ) ) return; // If alpha is zero, use setv. - if ( PASTEMAC(s,eq0)( *alpha ) ) + if ( bli_teq0s( s, *alpha ) ) { if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); @@ -274,10 +274,10 @@ void bli_dscalv_zen_int10 __m256d zv[10]; // If the vector dimension is zero, or if alpha is unit, return early. - if ( bli_zero_dim1( n ) || PASTEMAC(d,eq1)( *alpha ) ) return; + if ( bli_zero_dim1( n ) || bli_teq1s( d, *alpha ) ) return; // If alpha is zero, use setv. - if ( PASTEMAC(d,eq0)( *alpha ) ) + if ( bli_teq0s( d, *alpha ) ) { if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); diff --git a/kernels/zen/1/bli_swapv_zen_int8.c b/kernels/zen/1/bli_swapv_zen_int8.c index 09ed1cf83e..2d7a380065 100644 --- a/kernels/zen/1/bli_swapv_zen_int8.c +++ b/kernels/zen/1/bli_swapv_zen_int8.c @@ -181,14 +181,14 @@ void bli_sswapv_zen_int8 for ( ; (i + 0) < n; i += 1 ) { - PASTEMAC(s,swaps)( x[i], y[i] ); + bli_tswaps( s,s, x[i], y[i] ); } } else { for ( i = 0; i < n; ++i ) { - PASTEMAC(s,swaps)( (*xp), (*yp) ); + bli_tswaps( s,s, (*xp), (*yp) ); xp += incx; yp += incy; @@ -326,14 +326,14 @@ void bli_dswapv_zen_int8 for ( ; (i + 0) < n; i += 1 ) { - PASTEMAC(d,swaps)( x[i], y[i] ); + bli_tswaps( d,d, x[i], y[i] ); } } else { for ( i = 0; i < n; ++i ) { - PASTEMAC(d,swaps)( (*xp), (*yp) ); + bli_tswaps( d,d, (*xp), (*yp) ); xp += incx; yp += incy; diff --git a/kernels/zen/1f/bli_axpyf_zen_int_4.c b/kernels/zen/1f/bli_axpyf_zen_int_4.c index 4e50b4f1ca..e360c2ade3 100644 --- a/kernels/zen/1f/bli_axpyf_zen_int_4.c +++ b/kernels/zen/1f/bli_axpyf_zen_int_4.c @@ -79,7 +79,7 @@ void bli_caxpyf_zen_int_4 } // If either dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim2( m, b_n ) || bli_ceq0( *alpha ) ) return; + if ( bli_zero_dim2( m, b_n ) || bli_teq0s( c, *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. diff --git a/kernels/zen/1f/bli_axpyf_zen_int_5.c b/kernels/zen/1f/bli_axpyf_zen_int_5.c index 78477d3fa1..5f21cfefa3 100644 --- a/kernels/zen/1f/bli_axpyf_zen_int_5.c +++ b/kernels/zen/1f/bli_axpyf_zen_int_5.c @@ -99,7 +99,7 @@ void bli_saxpyf_zen_int_5 float chi4; // If either dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim2( m, b_n ) || bli_seq0( *alpha ) ) return; + if ( bli_zero_dim2( m, b_n ) || bli_teq0s( s, *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. @@ -348,7 +348,7 @@ void bli_daxpyf_zen_int_5 double chi4; // If either dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim2( m, b_n ) || bli_deq0( *alpha ) ) return; + if ( bli_zero_dim2( m, b_n ) || bli_teq0s( d, *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. @@ -597,10 +597,10 @@ void bli_daxpyf_zen_int_16x2 v2df_t a40v, a41v; - v2df_t y4v; + v2df_t y4v; // If either dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim2( m, b_n ) || bli_deq0( *alpha ) ) return; + if ( bli_zero_dim2( m, b_n ) || bli_teq0s( d, *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. @@ -888,7 +888,7 @@ void bli_daxpyf_zen_int_16x4 v2df_t a40v, a41v, a42v, a43v; // If either dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim2( m, b_n ) || bli_deq0( *alpha ) ) return; + if ( bli_zero_dim2( m, b_n ) || bli_teq0s( d, *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. diff --git a/kernels/zen/1f/bli_axpyf_zen_int_8.c b/kernels/zen/1f/bli_axpyf_zen_int_8.c index d495ad4acb..80786f7710 100644 --- a/kernels/zen/1f/bli_axpyf_zen_int_8.c +++ b/kernels/zen/1f/bli_axpyf_zen_int_8.c @@ -92,7 +92,7 @@ void bli_saxpyf_zen_int_8 float chi4, chi5, chi6, chi7; // If either dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim2( m, b_n ) || PASTEMAC(s,eq0)( *alpha ) ) return; + if ( bli_zero_dim2( m, b_n ) || bli_teq0s( s, *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. @@ -107,8 +107,8 @@ void bli_saxpyf_zen_int_8 float* restrict y1 = y + (0 )*incy; float alpha_chi1; - PASTEMAC(s,copycjs)( conjx, *chi1, alpha_chi1 ); - PASTEMAC(s,scals)( *alpha, alpha_chi1 ); + bli_tcopycjs( s,s, conjx, *chi1, alpha_chi1 ); + bli_tscals( s,s,s, *alpha, alpha_chi1 ); f ( @@ -160,14 +160,14 @@ void bli_saxpyf_zen_int_8 chi7 = *( x + 7*incx ); // Scale each chi scalar by alpha. - PASTEMAC(s,scals)( *alpha, chi0 ); - PASTEMAC(s,scals)( *alpha, chi1 ); - PASTEMAC(s,scals)( *alpha, chi2 ); - PASTEMAC(s,scals)( *alpha, chi3 ); - PASTEMAC(s,scals)( *alpha, chi4 ); - PASTEMAC(s,scals)( *alpha, chi5 ); - PASTEMAC(s,scals)( *alpha, chi6 ); - PASTEMAC(s,scals)( *alpha, chi7 ); + bli_tscals( s,s,s, *alpha, chi0 ); + bli_tscals( s,s,s, *alpha, chi1 ); + bli_tscals( s,s,s, *alpha, chi2 ); + bli_tscals( s,s,s, *alpha, chi3 ); + bli_tscals( s,s,s, *alpha, chi4 ); + bli_tscals( s,s,s, *alpha, chi5 ); + bli_tscals( s,s,s, *alpha, chi6 ); + bli_tscals( s,s,s, *alpha, chi7 ); // Broadcast the (alpha*chi?) scalars to all elements of vector registers. chi0v.v = _mm256_broadcast_ss( &chi0 ); @@ -295,7 +295,7 @@ void bli_daxpyf_zen_int_8 double chi4, chi5, chi6, chi7; // If either dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim2( m, b_n ) || PASTEMAC(d,eq0)( *alpha ) ) return; + if ( bli_zero_dim2( m, b_n ) || bli_teq0s( d, *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. @@ -310,8 +310,8 @@ void bli_daxpyf_zen_int_8 double* restrict y1 = y + (0 )*incy; double alpha_chi1; - PASTEMAC(d,copycjs)( conjx, *chi1, alpha_chi1 ); - PASTEMAC(d,scals)( *alpha, alpha_chi1 ); + bli_tcopycjs( d,d, conjx, *chi1, alpha_chi1 ); + bli_tscals( d,d,d, *alpha, alpha_chi1 ); f ( @@ -363,14 +363,14 @@ void bli_daxpyf_zen_int_8 chi7 = *( x + 7*incx ); // Scale each chi scalar by alpha. - PASTEMAC(d,scals)( *alpha, chi0 ); - PASTEMAC(d,scals)( *alpha, chi1 ); - PASTEMAC(d,scals)( *alpha, chi2 ); - PASTEMAC(d,scals)( *alpha, chi3 ); - PASTEMAC(d,scals)( *alpha, chi4 ); - PASTEMAC(d,scals)( *alpha, chi5 ); - PASTEMAC(d,scals)( *alpha, chi6 ); - PASTEMAC(d,scals)( *alpha, chi7 ); + bli_tscals( d,d,d, *alpha, chi0 ); + bli_tscals( d,d,d, *alpha, chi1 ); + bli_tscals( d,d,d, *alpha, chi2 ); + bli_tscals( d,d,d, *alpha, chi3 ); + bli_tscals( d,d,d, *alpha, chi4 ); + bli_tscals( d,d,d, *alpha, chi5 ); + bli_tscals( d,d,d, *alpha, chi6 ); + bli_tscals( d,d,d, *alpha, chi7 ); // Broadcast the (alpha*chi?) scalars to all elements of vector registers. chi0v.v = _mm256_broadcast_sd( &chi0 ); diff --git a/kernels/zen/1f/bli_dotxf_zen_int_8.c b/kernels/zen/1f/bli_dotxf_zen_int_8.c index db62c3c592..1077d10837 100644 --- a/kernels/zen/1f/bli_dotxf_zen_int_8.c +++ b/kernels/zen/1f/bli_dotxf_zen_int_8.c @@ -82,7 +82,7 @@ void bli_sdotxf_zen_int_8 // If the m dimension is zero, or if alpha is zero, the computation // simplifies to updating y. - if ( bli_zero_dim1( m ) || PASTEMAC(s,eq0)( *alpha ) ) + if ( bli_zero_dim1( m ) || bli_teq0s( s, *alpha ) ) { scalv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_FLOAT, BLIS_SCALV_KER, cntx ); @@ -404,7 +404,7 @@ void bli_sdotxf_zen_int_8 // We know at this point that alpha is nonzero; however, beta may still // be zero. If beta is indeed zero, we must overwrite y rather than scale // by beta (in case y contains NaN or Inf). - if ( PASTEMAC(s,eq0)( *beta ) ) + if ( bli_teq0s( s, *beta ) ) { // Apply alpha to the accumulated dot product in rho: // y := alpha * rho @@ -478,7 +478,7 @@ void bli_ddotxf_zen_int_8 // If the m dimension is zero, or if alpha is zero, the computation // simplifies to updating y. - if ( bli_zero_dim1( m ) || PASTEMAC(d,eq0)( *alpha ) ) + if ( bli_zero_dim1( m ) || bli_teq0s( d, *alpha ) ) { scalv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_DOUBLE, BLIS_SCALV_KER, cntx ); @@ -791,7 +791,7 @@ void bli_ddotxf_zen_int_8 // We know at this point that alpha is nonzero; however, beta may still // be zero. If beta is indeed zero, we must overwrite y rather than scale // by beta (in case y contains NaN or Inf). - if ( PASTEMAC(d,eq0)( *beta ) ) + if ( bli_teq0s( d, *beta ) ) { // Apply alpha to the accumulated dot product in rho: // y := alpha * rho diff --git a/kernels/zen/3/bli_gemmt_small.c b/kernels/zen/3/bli_gemmt_small.c index f2fd88de7b..619aa0fff4 100644 --- a/kernels/zen/3/bli_gemmt_small.c +++ b/kernels/zen/3/bli_gemmt_small.c @@ -1586,7 +1586,7 @@ static err_t bli_sgemmt_small } //copy/compute sryk values back to C using SIMD - if ( bli_seq0( *beta_cast ) ) + if ( bli_teq0s( s, *beta_cast ) ) {//just copy in case of beta = 0 dim_t _i, _j, k, _l; if(bli_obj_is_lower(c)) // c is lower @@ -3156,7 +3156,7 @@ static err_t bli_dgemmt_small } //copy/compute sryk values back to C using SIMD - if ( bli_seq0( *beta_cast ) ) + if ( bli_teq0s( s, *beta_cast ) ) {//just copy for beta = 0 dim_t _i, _j, k, _l; if(bli_obj_is_lower(c)) //c is lower @@ -3717,7 +3717,7 @@ static err_t bli_sgemmt_small_atbn } //copy/compute sryk values back to C - if ( bli_seq0( *beta_cast ) ) //when beta is 0, just copy result to C + if ( bli_teq0s( s, *beta_cast ) ) //when beta is 0, just copy result to C { dim_t _i, _j; if(bli_obj_is_lower(c)) //c is lower @@ -4149,7 +4149,7 @@ static err_t bli_dgemmt_small_atbn } //copy/compute sryk values back to C - if ( bli_seq0( *beta_cast ) ) //when beta is 0, just copy result to C + if ( bli_teq0s( s, *beta_cast ) ) //when beta is 0, just copy result to C { dim_t _i, _j; if(bli_obj_is_lower(c)) //c is lower diff --git a/ref_kernels/1/bli_addv_ref.c b/ref_kernels/1/bli_addv_ref.c index c0ef4bda76..7b30aca1a6 100644 --- a/ref_kernels/1/bli_addv_ref.c +++ b/ref_kernels/1/bli_addv_ref.c @@ -58,14 +58,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,addjs)( x[i], y[i] ); \ + bli_taddjs( ch,ch,ch, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,addjs)( *x, *y ); \ + bli_taddjs( ch,ch,ch, *x, *y ); \ \ x += incx; \ y += incy; \ @@ -79,14 +79,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,adds)( x[i], y[i] ); \ + bli_tadds( ch,ch,ch, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,adds)( *x, *y ); \ + bli_tadds( ch,ch,ch, *x, *y ); \ \ x += incx; \ y += incy; \ diff --git a/ref_kernels/1/bli_amaxv_ref.c b/ref_kernels/1/bli_amaxv_ref.c index 87ef632259..47c012a2a7 100644 --- a/ref_kernels/1/bli_amaxv_ref.c +++ b/ref_kernels/1/bli_amaxv_ref.c @@ -64,17 +64,17 @@ void PASTEMAC(ch,opname,arch,suf) \ the behavior of netlib BLAS's i?amax() routines. */ \ if ( bli_zero_dim1( n ) ) \ { \ - PASTEMAC(i,copys)( *zero_i, *index ); \ + bli_tcopys( i,i, *zero_i, *index ); \ return; \ } \ \ /* Initialize the index of the maximum absolute value to zero. */ \ - PASTEMAC(i,copys)( *zero_i, i_max_l ); \ + bli_tcopys( i,i, *zero_i, i_max_l ); \ \ /* Initialize the maximum absolute value search candidate with -1, which is guaranteed to be less than all values we will compute. */ \ - PASTEMAC(chr,copys)( *minus_one, abs_chi1_max ); \ + bli_tcopys( chr,chr, *minus_one, abs_chi1_max ); \ \ if ( incx == 1 ) \ { \ @@ -83,23 +83,23 @@ void PASTEMAC(ch,opname,arch,suf) \ for ( dim_t i = 0; i < n; ++i ) \ { \ /* Get the real and imaginary components of chi1. */ \ - PASTEMAC(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \ + bli_tgets( ch,chr, *chi1, chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ - PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \ - PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \ + bli_tabval2s( chr,chr,chr, chi1_r, chi1_r ); \ + bli_tabval2s( chr,chr,chr, chi1_i, chi1_i ); \ \ /* Add the real and imaginary absolute values together. */ \ - PASTEMAC(chr,set0s)( abs_chi1 ); \ - PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \ - PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \ + bli_tset0s( chr, abs_chi1 ); \ + bli_tadds( chr,chr,chr, chi1_r, abs_chi1 ); \ + bli_tadds( chr,chr,chr, chi1_i, abs_chi1 ); \ \ /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ \ - if ( abs_chi1_max < abs_chi1 || ( bli_isnan( abs_chi1 ) && !bli_isnan( abs_chi1_max ) ) ) \ + if ( abs_chi1_max < abs_chi1 || ( PASTEMAC(chr,isnan)( abs_chi1 ) && !PASTEMAC(chr,isnan)( abs_chi1_max ) ) ) \ { \ abs_chi1_max = abs_chi1; \ i_max_l = i; \ @@ -115,23 +115,23 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict chi1 = x + (i )*incx; \ \ /* Get the real and imaginary components of chi1. */ \ - PASTEMAC(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \ + bli_tgets( ch,chr, *chi1, chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ - PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \ - PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \ + bli_tabval2s( chr,chr,chr, chi1_r, chi1_r ); \ + bli_tabval2s( chr,chr,chr, chi1_i, chi1_i ); \ \ /* Add the real and imaginary absolute values together. */ \ - PASTEMAC(chr,set0s)( abs_chi1 ); \ - PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \ - PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \ + bli_tset0s( chr, abs_chi1 ); \ + bli_tadds( chr,chr,chr, chi1_r, abs_chi1 ); \ + bli_tadds( chr,chr,chr, chi1_i, abs_chi1 ); \ \ /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ \ - if ( abs_chi1_max < abs_chi1 || ( bli_isnan( abs_chi1 ) && !bli_isnan( abs_chi1_max ) ) ) \ + if ( abs_chi1_max < abs_chi1 || ( PASTEMAC(chr,isnan)( abs_chi1 ) && !PASTEMAC(chr,isnan)( abs_chi1_max ) ) ) \ { \ abs_chi1_max = abs_chi1; \ i_max_l = i; \ @@ -140,7 +140,7 @@ void PASTEMAC(ch,opname,arch,suf) \ } \ \ /* Store the final index to the output variable. */ \ - PASTEMAC(i,copys)( i_max_l, *index ); \ + bli_tcopys( i,i, i_max_l, *index ); \ } INSERT_GENTFUNCR_BASIC( amaxv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) diff --git a/ref_kernels/1/bli_axpbyv_ref.c b/ref_kernels/1/bli_axpbyv_ref.c index 1c265c8191..d23be20189 100644 --- a/ref_kernels/1/bli_axpbyv_ref.c +++ b/ref_kernels/1/bli_axpbyv_ref.c @@ -55,9 +55,9 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* beta = beta0; \ ctype* y = y0; \ \ - if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + if ( bli_teq0s( ch, *alpha ) ) \ { \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* If alpha is zero and beta is zero, set to zero. */ \ \ @@ -77,7 +77,7 @@ void PASTEMAC(ch,opname,arch,suf) \ ); \ return; \ } \ - else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + else if ( bli_teq1s( ch, *beta ) ) \ { \ /* If alpha is zero and beta is one, return. */ \ return; \ @@ -102,9 +102,9 @@ void PASTEMAC(ch,opname,arch,suf) \ } \ \ } \ - else if ( PASTEMAC(ch,eq1)( *alpha ) ) \ + else if ( bli_teq1s( ch, *alpha ) ) \ { \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* If alpha is one and beta is zero, use copyv. */ \ \ @@ -122,7 +122,7 @@ void PASTEMAC(ch,opname,arch,suf) \ ); \ return; \ } \ - else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + else if ( bli_teq1s( ch, *beta ) ) \ { \ /* If alpha is one and beta is one, use addv. */ \ \ @@ -162,7 +162,7 @@ void PASTEMAC(ch,opname,arch,suf) \ } \ else \ { \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* If alpha is something else and beta is zero, use scal2v. */ \ \ @@ -181,7 +181,7 @@ void PASTEMAC(ch,opname,arch,suf) \ ); \ return; \ } \ - else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + else if ( bli_teq1s( ch, *beta ) ) \ { \ /* If alpha is something else and beta is one, use axpyv. */ \ \ @@ -211,14 +211,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,axpbyjs)( *alpha, x[i], *beta, y[i] ); \ + bli_taxpbyjs( ch,ch,ch,ch,ch, *alpha, x[i], *beta, y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,axpbyjs)( *alpha, *x, *beta, *y ); \ + bli_taxpbyjs( ch,ch,ch,ch,ch, *alpha, *x, *beta, *y ); \ \ x += incx; \ y += incy; \ @@ -232,14 +232,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,axpbys)( *alpha, x[i], *beta, y[i] ); \ + bli_taxpbys( ch,ch,ch,ch,ch, *alpha, x[i], *beta, y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,axpbys)( *alpha, *x, *beta, *y ); \ + bli_taxpbys( ch,ch,ch,ch,ch, *alpha, *x, *beta, *y ); \ \ x += incx; \ y += incy; \ diff --git a/ref_kernels/1/bli_axpyv_ref.c b/ref_kernels/1/bli_axpyv_ref.c index f9ca0fb9dd..03f75f5854 100644 --- a/ref_kernels/1/bli_axpyv_ref.c +++ b/ref_kernels/1/bli_axpyv_ref.c @@ -54,10 +54,10 @@ void PASTEMAC(ch,opname,arch,suf) \ ctype* y = y0; \ \ /* If alpha is zero, return. */ \ - if ( PASTEMAC(ch,eq0)( *alpha ) ) return; \ + if ( bli_teq0s( ch, *alpha ) ) return; \ \ /* If alpha is one, use addv. */ \ - if ( PASTEMAC(ch,eq1)( *alpha ) ) \ + if ( bli_teq1s( ch, *alpha ) ) \ { \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ @@ -81,14 +81,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,axpyjs)( *alpha, x[i], y[i] ); \ + bli_taxpyjs( ch,ch,ch,ch, *alpha, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,axpyjs)( *alpha, *x, *y ); \ + bli_taxpyjs( ch,ch,ch,ch, *alpha, *x, *y ); \ \ x += incx; \ y += incy; \ @@ -102,14 +102,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,axpys)( *alpha, x[i], y[i] ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,axpys)( *alpha, *x, *y ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, *x, *y ); \ \ x += incx; \ y += incy; \ diff --git a/ref_kernels/1/bli_copyv_ref.c b/ref_kernels/1/bli_copyv_ref.c index 0f35f5167e..634ab2caae 100644 --- a/ref_kernels/1/bli_copyv_ref.c +++ b/ref_kernels/1/bli_copyv_ref.c @@ -58,14 +58,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,copyjs)( x[i], y[i] ); \ + bli_tcopyjs( ch,ch, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,copyjs)( *x, *y ); \ + bli_tcopyjs( ch,ch, *x, *y ); \ \ x += incx; \ y += incy; \ @@ -79,14 +79,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,copys)( x[i], y[i] ); \ + bli_tcopys( ch,ch, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,copys)( *x, *y ); \ + bli_tcopys( ch,ch, *x, *y ); \ \ x += incx; \ y += incy; \ diff --git a/ref_kernels/1/bli_dotv_ref.c b/ref_kernels/1/bli_dotv_ref.c index 18a195ca2d..1fd07461c9 100644 --- a/ref_kernels/1/bli_dotv_ref.c +++ b/ref_kernels/1/bli_dotv_ref.c @@ -56,11 +56,11 @@ void PASTEMAC(ch,opname,arch,suf) \ \ if ( bli_zero_dim1( n ) ) \ { \ - PASTEMAC(ch,set0s)( *rho ); \ + bli_tset0s( ch, *rho ); \ return; \ } \ \ - PASTEMAC(ch,set0s)( dotxy ); \ + bli_tset0s( ch, dotxy ); \ \ conj_t conjx_use = conjx; \ \ @@ -77,14 +77,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,dotjs)( x[i], y[i], dotxy ); \ + bli_tdotjs( ch,ch,ch,ch, x[i], y[i], dotxy ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,dotjs)( *x, *y, dotxy ); \ + bli_tdotjs( ch,ch,ch,ch, *x, *y, dotxy ); \ \ x += incx; \ y += incy; \ @@ -98,14 +98,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,dots)( x[i], y[i], dotxy ); \ + bli_tdots( ch,ch,ch,ch, x[i], y[i], dotxy ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,dots)( *x, *y, dotxy ); \ + bli_tdots( ch,ch,ch,ch, *x, *y, dotxy ); \ \ x += incx; \ y += incy; \ @@ -114,9 +114,9 @@ void PASTEMAC(ch,opname,arch,suf) \ } \ \ if ( bli_is_conj( conjy ) ) \ - PASTEMAC(ch,conjs)( dotxy ); \ + bli_tconjs( ch, dotxy ); \ \ - PASTEMAC(ch,copys)( dotxy, *rho ); \ + bli_tcopys( ch,ch, dotxy, *rho ); \ } INSERT_GENTFUNC_BASIC( dotv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) diff --git a/ref_kernels/1/bli_dotxv_ref.c b/ref_kernels/1/bli_dotxv_ref.c index 8fe1160018..41961bb9b9 100644 --- a/ref_kernels/1/bli_dotxv_ref.c +++ b/ref_kernels/1/bli_dotxv_ref.c @@ -59,19 +59,19 @@ void PASTEMAC(ch,opname,arch,suf) \ ctype dotxy; \ \ /* If beta is zero, clear rho. Otherwise, scale by beta. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ - PASTEMAC(ch,set0s)( *rho ); \ + bli_tset0s( ch, *rho ); \ } \ else \ { \ - PASTEMAC(ch,scals)( *beta, *rho ); \ + bli_tscals( ch,ch,ch, *beta, *rho ); \ } \ \ /* If the vectors are empty or if alpha is zero, return early. */ \ - if ( bli_zero_dim1( n ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ + if ( bli_zero_dim1( n ) || bli_teq0s( ch, *alpha ) ) return; \ \ - PASTEMAC(ch,set0s)( dotxy ); \ + bli_tset0s( ch, dotxy ); \ \ /* If y must be conjugated, we do so indirectly by first toggling the effective conjugation of x and then conjugating the resulting dot @@ -88,14 +88,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,dotjs)( x[i], y[i], dotxy ); \ + bli_tdotjs( ch,ch,ch,ch, x[i], y[i], dotxy ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,dotjs)( *x, *y, dotxy ); \ + bli_tdotjs( ch,ch,ch,ch, *x, *y, dotxy ); \ \ x += incx; \ y += incy; \ @@ -109,14 +109,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,dots)( x[i], y[i], dotxy ); \ + bli_tdots( ch,ch,ch,ch, x[i], y[i], dotxy ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,dots)( *x, *y, dotxy ); \ + bli_tdots( ch,ch,ch,ch, *x, *y, dotxy ); \ \ x += incx; \ y += incy; \ @@ -125,9 +125,9 @@ void PASTEMAC(ch,opname,arch,suf) \ } \ \ if ( bli_is_conj( conjy ) ) \ - PASTEMAC(ch,conjs)( dotxy ); \ + bli_tconjs( ch, dotxy ); \ \ - PASTEMAC(ch,axpys)( *alpha, dotxy, *rho ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, dotxy, *rho ); \ } INSERT_GENTFUNC_BASIC( dotxv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) diff --git a/ref_kernels/1/bli_invertv_ref.c b/ref_kernels/1/bli_invertv_ref.c index 1cea1c61ac..a70fcfd99e 100644 --- a/ref_kernels/1/bli_invertv_ref.c +++ b/ref_kernels/1/bli_invertv_ref.c @@ -53,14 +53,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,inverts)( x[i] ); \ + bli_tinverts( ch,ch, x[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,inverts)( *x ); \ + bli_tinverts( ch,ch, *x ); \ \ x += incx; \ } \ diff --git a/ref_kernels/1/bli_invscalv_ref.c b/ref_kernels/1/bli_invscalv_ref.c index 914c89174b..3eea27127e 100644 --- a/ref_kernels/1/bli_invscalv_ref.c +++ b/ref_kernels/1/bli_invscalv_ref.c @@ -52,28 +52,28 @@ void PASTEMAC(ch,opname,arch,suf) \ ctype* x = x0; \ \ /* If alpha is one, return. */ \ - if ( PASTEMAC(ch,eq1)( *alpha ) ) return; \ + if ( bli_teq1s( ch, *alpha ) ) return; \ \ /* If alpha is zero, inv(alpha) is undefined. Bad user! Return early. */ \ - if ( PASTEMAC(ch,eq0)( *alpha ) ) return; \ + if ( bli_teq0s( ch, *alpha ) ) return; \ \ ctype alpha_conj; \ \ - PASTEMAC(ch,copycjs)( conjalpha, *alpha, alpha_conj ); \ + bli_tcopycjs( ch,ch, conjalpha, *alpha, alpha_conj ); \ \ if ( incx == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,invscals)( alpha_conj, x[i] ); \ + bli_tinvscals( ch,ch,ch, alpha_conj, x[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,invscals)( alpha_conj, *x ); \ + bli_tinvscals( ch,ch,ch, alpha_conj, *x ); \ \ x += incx; \ } \ diff --git a/ref_kernels/1/bli_scal2v_ref.c b/ref_kernels/1/bli_scal2v_ref.c index 4b96f5659e..242272565c 100644 --- a/ref_kernels/1/bli_scal2v_ref.c +++ b/ref_kernels/1/bli_scal2v_ref.c @@ -53,7 +53,7 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* x = x0; \ ctype* y = y0; \ \ - if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + if ( bli_teq0s( ch, *alpha ) ) \ { \ /* If alpha is zero, use setv. */ \ \ @@ -73,7 +73,7 @@ void PASTEMAC(ch,opname,arch,suf) \ ); \ return; \ } \ - else if ( PASTEMAC(ch,eq1)( *alpha ) ) \ + else if ( bli_teq1s( ch, *alpha ) ) \ { \ /* If alpha is one, use copyv. */ \ \ @@ -99,14 +99,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,scal2js)( *alpha, x[i], y[i] ); \ + bli_tscal2js( ch,ch,ch,ch, *alpha, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,scal2js)( *alpha, *x, *y ); \ + bli_tscal2js( ch,ch,ch,ch, *alpha, *x, *y ); \ \ x += incx; \ y += incy; \ @@ -120,14 +120,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,scal2s)( *alpha, x[i], y[i] ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,scal2s)( *alpha, *x, *y ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, *x, *y ); \ \ x += incx; \ y += incy; \ diff --git a/ref_kernels/1/bli_scalv_ref.c b/ref_kernels/1/bli_scalv_ref.c index 8e9a1ec987..5d8d396364 100644 --- a/ref_kernels/1/bli_scalv_ref.c +++ b/ref_kernels/1/bli_scalv_ref.c @@ -52,10 +52,10 @@ void PASTEMAC(ch,opname,arch,suf) \ ctype* x = x0; \ \ /* If alpha is one, return. */ \ - if ( PASTEMAC(ch,eq1)( *alpha ) ) return; \ + if ( bli_teq1s( ch, *alpha ) ) return; \ \ /* If alpha is zero, use setv. */ \ - if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + if ( bli_teq0s( ch, *alpha ) ) \ { \ const ctype* zero = PASTEMAC(ch,0); \ \ @@ -76,21 +76,21 @@ void PASTEMAC(ch,opname,arch,suf) \ \ ctype alpha_conj; \ \ - PASTEMAC(ch,copycjs)( conjalpha, *alpha, alpha_conj ); \ + bli_tcopycjs( ch,ch, conjalpha, *alpha, alpha_conj ); \ \ if ( incx == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,scals)( alpha_conj, x[i] ); \ + bli_tscals( ch,ch,ch, alpha_conj, x[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,scals)( alpha_conj, *x ); \ + bli_tscals( ch,ch,ch, alpha_conj, *x ); \ \ x += incx; \ } \ diff --git a/ref_kernels/1/bli_setv_ref.c b/ref_kernels/1/bli_setv_ref.c index 8d945f618e..197a0b73de 100644 --- a/ref_kernels/1/bli_setv_ref.c +++ b/ref_kernels/1/bli_setv_ref.c @@ -51,21 +51,21 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* alpha = alpha0; \ ctype* x = x0; \ \ - if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + if ( bli_teq0s( ch, *alpha ) ) \ { \ if ( incx == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,set0s)( x[i] ); \ + bli_tset0s( ch, x[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,set0s)( *x ); \ + bli_tset0s( ch, *x ); \ \ x += incx; \ } \ @@ -75,21 +75,21 @@ void PASTEMAC(ch,opname,arch,suf) \ { \ ctype alpha_conj; \ \ - PASTEMAC(ch,copycjs)( conjalpha, *alpha, alpha_conj ); \ + bli_tcopycjs( ch,ch, conjalpha, *alpha, alpha_conj ); \ \ if ( incx == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,copys)( alpha_conj, x[i] ); \ + bli_tcopys( ch,ch, alpha_conj, x[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,copys)( alpha_conj, *x ); \ + bli_tcopys( ch,ch, alpha_conj, *x ); \ \ x += incx; \ } \ diff --git a/ref_kernels/1/bli_subv_ref.c b/ref_kernels/1/bli_subv_ref.c index d43d960337..b60a0c8004 100644 --- a/ref_kernels/1/bli_subv_ref.c +++ b/ref_kernels/1/bli_subv_ref.c @@ -58,14 +58,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,subjs)( x[i], y[i] ); \ + bli_tsubjs( ch,ch,ch, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,subjs)( *x, *y ); \ + bli_tsubjs( ch,ch,ch, *x, *y ); \ \ x += incx; \ y += incy; \ @@ -79,14 +79,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,subs)( x[i], y[i] ); \ + bli_tsubs( ch,ch,ch, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,subs)( *x, *y ); \ + bli_tsubs( ch,ch,ch, *x, *y ); \ \ x += incx; \ y += incy; \ diff --git a/ref_kernels/1/bli_swapv_ref.c b/ref_kernels/1/bli_swapv_ref.c index f01d0d09f7..1739464c05 100644 --- a/ref_kernels/1/bli_swapv_ref.c +++ b/ref_kernels/1/bli_swapv_ref.c @@ -55,14 +55,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,swaps)( x[i], y[i] ); \ + bli_tswaps( ch,ch, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,swaps)( *x, *y ); \ + bli_tswaps( ch,ch, *x, *y ); \ \ x += incx; \ y += incy; \ diff --git a/ref_kernels/1/bli_xpbyv_ref.c b/ref_kernels/1/bli_xpbyv_ref.c index 02c0cd14d8..b7b584fb05 100644 --- a/ref_kernels/1/bli_xpbyv_ref.c +++ b/ref_kernels/1/bli_xpbyv_ref.c @@ -54,7 +54,7 @@ void PASTEMAC(ch,opname,arch,suf) \ ctype* y = y0; \ \ /* If beta is zero, use copyv. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ @@ -71,7 +71,7 @@ void PASTEMAC(ch,opname,arch,suf) \ return; \ } \ /* If alpha is one, use addv. */ \ - else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + else if ( bli_teq1s( ch, *beta ) ) \ { \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ @@ -95,14 +95,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,xpbyjs)( x[i], *beta, y[i] ); \ + bli_txpbyjs( ch,ch,ch,ch, x[i], *beta, y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,xpbyjs)( *x, *beta, *y ); \ + bli_txpbyjs( ch,ch,ch,ch, *x, *beta, *y ); \ \ x += incx; \ y += incy; \ @@ -116,14 +116,14 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,xpbys)( x[i], *beta, y[i] ); \ + bli_txpbys( ch,ch,ch,ch, x[i], *beta, y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,xpbys)( *x, *beta, *y ); \ + bli_txpbys( ch,ch,ch,ch, *x, *beta, *y ); \ \ x += incx; \ y += incy; \ diff --git a/ref_kernels/1f/bli_axpy2v_ref.c b/ref_kernels/1f/bli_axpy2v_ref.c index 8b5b2cbbb7..9f46577b0b 100644 --- a/ref_kernels/1f/bli_axpy2v_ref.c +++ b/ref_kernels/1f/bli_axpy2v_ref.c @@ -70,8 +70,8 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,axpys)( *alphax, x[i], z[i] ); \ - PASTEMAC(ch,axpys)( *alphay, y[i], z[i] ); \ + bli_taxpys( ch,ch,ch,ch, *alphax, x[i], z[i] ); \ + bli_taxpys( ch,ch,ch,ch, *alphay, y[i], z[i] ); \ } \ } \ else /* if ( bli_is_conj( conjy ) ) */ \ @@ -79,9 +79,9 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,axpys)( *alphax, x[i], z[i] ); \ - PASTEMAC(ch,copyjs)( y[i], psic ); \ - PASTEMAC(ch,axpys)( *alphay, psic, z[i] ); \ + bli_taxpys( ch,ch,ch,ch, *alphax, x[i], z[i] ); \ + bli_tcopyjs( ch,ch, y[i], psic ); \ + bli_taxpys( ch,ch,ch,ch, *alphay, psic, z[i] ); \ } \ } \ } \ @@ -92,9 +92,9 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,copyjs)( x[i], chic ); \ - PASTEMAC(ch,axpys)( *alphax, chic, z[i] ); \ - PASTEMAC(ch,axpys)( *alphay, y[i], z[i] ); \ + bli_tcopyjs( ch,ch, x[i], chic ); \ + bli_taxpys( ch,ch,ch,ch, *alphax, chic, z[i] ); \ + bli_taxpys( ch,ch,ch,ch, *alphay, y[i], z[i] ); \ } \ } \ else /* if ( bli_is_conj( conjy ) ) */ \ @@ -102,10 +102,10 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ - PASTEMAC(ch,copyjs)( x[i], chic ); \ - PASTEMAC(ch,axpys)( *alphax, chic, z[i] ); \ - PASTEMAC(ch,copyjs)( y[i], psic ); \ - PASTEMAC(ch,axpys)( *alphay, psic, z[i] ); \ + bli_tcopyjs( ch,ch, x[i], chic ); \ + bli_taxpys( ch,ch,ch,ch, *alphax, chic, z[i] ); \ + bli_tcopyjs( ch,ch, y[i], psic ); \ + bli_taxpys( ch,ch,ch,ch, *alphay, psic, z[i] ); \ } \ } \ } \ diff --git a/ref_kernels/1f/bli_axpyf_ref.c b/ref_kernels/1f/bli_axpyf_ref.c index 233c64fc21..f86f5f46da 100644 --- a/ref_kernels/1f/bli_axpyf_ref.c +++ b/ref_kernels/1f/bli_axpyf_ref.c @@ -67,13 +67,13 @@ void PASTEMAC(ch,opname,arch,suf) \ { \ PRAGMA_SIMD \ for ( dim_t j = 0; j < ff; ++j ) \ - PASTEMAC(ch,scal2js)( *alpha, x[j], ax[j] ); \ + bli_tscal2js( ch,ch,ch,ch, *alpha, x[j], ax[j] ); \ } \ else \ { \ PRAGMA_SIMD \ for ( dim_t j = 0; j < ff; ++j ) \ - PASTEMAC(ch,scal2s)( *alpha, x[j], ax[j] ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, x[j], ax[j] ); \ } \ \ /* Accumulate ff separate axpyv's into y. */ \ @@ -83,7 +83,7 @@ void PASTEMAC(ch,opname,arch,suf) \ for ( dim_t i = 0; i < m; ++i ) \ for ( dim_t j = 0; j < ff; ++j ) \ { \ - PASTEMAC(ch,axpys)( ax[j], a[i + j*lda], y[i] ); \ + bli_taxpys( ch,ch,ch,ch, ax[j], a[i + j*lda], y[i] ); \ } \ } \ else \ @@ -92,7 +92,7 @@ void PASTEMAC(ch,opname,arch,suf) \ for ( dim_t i = 0; i < m; ++i ) \ for ( dim_t j = 0; j < ff; ++j ) \ { \ - PASTEMAC(ch,axpyjs)( ax[j], a[i + j*lda], y[i] ); \ + bli_taxpyjs( ch,ch,ch,ch, ax[j], a[i + j*lda], y[i] ); \ } \ } \ } \ @@ -110,8 +110,8 @@ void PASTEMAC(ch,opname,arch,suf) \ \ ctype alpha_chi1; \ \ - PASTEMAC(ch,copycjs)( conjx, *chi1, alpha_chi1 ); \ - PASTEMAC(ch,scals)( *alpha, alpha_chi1 ); \ + bli_tcopycjs( ch,ch, conjx, *chi1, alpha_chi1 ); \ + bli_tscals( ch,ch,ch, *alpha, alpha_chi1 ); \ \ kfp_av \ ( \ diff --git a/ref_kernels/1f/bli_dotaxpyv_ref.c b/ref_kernels/1f/bli_dotaxpyv_ref.c index fe558ba3a5..20b1b0e873 100644 --- a/ref_kernels/1f/bli_dotaxpyv_ref.c +++ b/ref_kernels/1f/bli_dotaxpyv_ref.c @@ -67,7 +67,7 @@ void PASTEMAC(ch,opname,arch,suf) \ conj_t conjxt_use = conjxt; \ ctype dotxy; \ \ - PASTEMAC(ch,set0s)( dotxy ); \ + bli_tset0s( ch, dotxy ); \ \ if ( bli_is_conj( conjy ) ) \ bli_toggle_conj( &conjxt_use ); \ @@ -77,8 +77,8 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < m; ++i ) \ { \ - PASTEMAC(ch,dots)( x[i], y[i], dotxy ); \ - PASTEMAC(ch,axpys)( *alpha, x[i], z[i] ); \ + bli_tdots( ch,ch,ch,ch, x[i], y[i], dotxy ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, x[i], z[i] ); \ } \ } \ else /* bli_is_conj( conjxt_use ) ) */ \ @@ -86,22 +86,22 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < m; ++i ) \ { \ - PASTEMAC(ch,dotjs)( x[i], y[i], dotxy ); \ - PASTEMAC(ch,axpys)( *alpha, x[i], z[i] ); \ + bli_tdotjs( ch,ch,ch,ch, x[i], y[i], dotxy ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, x[i], z[i] ); \ } \ } \ \ if ( bli_is_conj( conjy ) ) \ - PASTEMAC(ch,conjs)( dotxy ); \ + bli_tconjs( ch, dotxy ); \ \ - PASTEMAC(ch,copys)( dotxy, *rho ); \ + bli_tcopys( ch,ch, dotxy, *rho ); \ } \ else /* bli_is_conj( conjx ) ) */ \ { \ conj_t conjxt_use = conjxt; \ ctype dotxy; \ \ - PASTEMAC(ch,set0s)( dotxy ); \ + bli_tset0s( ch, dotxy ); \ \ if ( bli_is_conj( conjy ) ) \ bli_toggle_conj( &conjxt_use ); \ @@ -111,8 +111,8 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < m; ++i ) \ { \ - PASTEMAC(ch,dots)( x[i], y[i], dotxy ); \ - PASTEMAC(ch,axpyjs)( *alpha, x[i], z[i] ); \ + bli_tdots( ch,ch,ch,ch, x[i], y[i], dotxy ); \ + bli_taxpyjs( ch,ch,ch,ch, *alpha, x[i], z[i] ); \ } \ } \ else /* bli_is_conj( conjxt_use ) ) */ \ @@ -120,15 +120,15 @@ void PASTEMAC(ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < m; ++i ) \ { \ - PASTEMAC(ch,dotjs)( x[i], y[i], dotxy ); \ - PASTEMAC(ch,axpyjs)( *alpha, x[i], z[i] ); \ + bli_tdotjs( ch,ch,ch,ch, x[i], y[i], dotxy ); \ + bli_taxpyjs( ch,ch,ch,ch, *alpha, x[i], z[i] ); \ } \ } \ \ if ( bli_is_conj( conjy ) ) \ - PASTEMAC(ch,conjs)( dotxy ); \ + bli_tconjs( ch, dotxy ); \ \ - PASTEMAC(ch,copys)( dotxy, *rho ); \ + bli_tcopys( ch,ch, dotxy, *rho ); \ } \ } \ else \ diff --git a/ref_kernels/1f/bli_dotxaxpyf_ref.c b/ref_kernels/1f/bli_dotxaxpyf_ref.c index 6cfa5168c5..83d4be89c4 100644 --- a/ref_kernels/1f/bli_dotxaxpyf_ref.c +++ b/ref_kernels/1f/bli_dotxaxpyf_ref.c @@ -75,33 +75,33 @@ void PASTEMAC(ch,opname,arch,suf) \ ctype ax[ ff ]; \ \ /* If beta is zero, clear y. Otherwise, scale by beta. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ - for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,set0s)( y[i] ); \ + for ( dim_t i = 0; i < ff; ++i ) bli_tset0s( ch, y[i] ); \ } \ else \ { \ - for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,scals)( *beta, y[i] ); \ + for ( dim_t i = 0; i < ff; ++i ) bli_tscals( ch,ch,ch, *beta, y[i] ); \ } \ \ /* If the vectors are empty or if alpha is zero, return early. */ \ - if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ + if ( bli_zero_dim1( m ) || bli_teq0s( ch, *alpha ) ) return; \ \ /* Initialize r vector to 0. */ \ - for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,set0s)( r[i] ); \ + for ( dim_t i = 0; i < ff; ++i ) bli_tset0s( ch, r[i] ); \ \ /* Scale x by alpha, storing to a temporary array ax. */ \ if ( bli_is_conj( conjx ) ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < ff; ++i ) \ - PASTEMAC(ch,scal2js)( *alpha, x[i], ax[i] ); \ + bli_tscal2js( ch,ch,ch,ch, *alpha, x[i], ax[i] ); \ } \ else \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < ff; ++i ) \ - PASTEMAC(ch,scal2s)( *alpha, x[i], ax[i] ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, x[i], ax[i] ); \ } \ \ /* If a must be conjugated, we do so indirectly by first toggling the @@ -120,8 +120,8 @@ void PASTEMAC(ch,opname,arch,suf) \ for ( dim_t p = 0; p < m; ++p ) \ for ( dim_t i = 0; i < ff; ++i ) \ { \ - PASTEMAC(ch,axpys)( a[p + i*lda], w[p], r[i] ); \ - PASTEMAC(ch,axpys)( ax[i], a[p + i*lda], z[p] ); \ + bli_taxpys( ch,ch,ch,ch, a[p + i*lda], w[p], r[i] ); \ + bli_taxpys( ch,ch,ch,ch, ax[i], a[p + i*lda], z[p] ); \ } \ } \ else \ @@ -130,8 +130,8 @@ void PASTEMAC(ch,opname,arch,suf) \ for ( dim_t p = 0; p < m; ++p ) \ for ( dim_t i = 0; i < ff; ++i ) \ { \ - PASTEMAC(ch,axpys)( a[p + i*lda], w[p], r[i] ); \ - PASTEMAC(ch,axpyjs)( ax[i], a[p + i*lda], z[p] ); \ + bli_taxpys( ch,ch,ch,ch, a[p + i*lda], w[p], r[i] ); \ + bli_taxpyjs( ch,ch,ch,ch, ax[i], a[p + i*lda], z[p] ); \ } \ } \ } \ @@ -143,8 +143,8 @@ void PASTEMAC(ch,opname,arch,suf) \ for ( dim_t p = 0; p < m; ++p ) \ for ( dim_t i = 0; i < ff; ++i ) \ { \ - PASTEMAC(ch,axpyjs)( a[p + i*lda], w[p], r[i] ); \ - PASTEMAC(ch,axpys)( ax[i], a[p + i*lda], z[p] ); \ + bli_taxpyjs( ch,ch,ch,ch, a[p + i*lda], w[p], r[i] ); \ + bli_taxpys( ch,ch,ch,ch, ax[i], a[p + i*lda], z[p] ); \ } \ } \ else \ @@ -153,18 +153,18 @@ void PASTEMAC(ch,opname,arch,suf) \ for ( dim_t p = 0; p < m; ++p ) \ for ( dim_t i = 0; i < ff; ++i ) \ { \ - PASTEMAC(ch,axpyjs)( a[p + i*lda], w[p], r[i] ); \ - PASTEMAC(ch,axpyjs)( ax[i], a[p + i*lda], z[p] ); \ + bli_taxpyjs( ch,ch,ch,ch, a[p + i*lda], w[p], r[i] ); \ + bli_taxpyjs( ch,ch,ch,ch, ax[i], a[p + i*lda], z[p] ); \ } \ } \ } \ \ if ( bli_is_conj( conjat ) ) \ - for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,conjs)( r[i] ); \ + for ( dim_t i = 0; i < ff; ++i ) bli_tconjs( ch, r[i] ); \ \ for ( dim_t i = 0; i < ff; ++i ) \ { \ - PASTEMAC(ch,axpys)( *alpha, r[i], y[i] ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, r[i], y[i] ); \ } \ } \ else \ diff --git a/ref_kernels/1f/bli_dotxf_ref.c b/ref_kernels/1f/bli_dotxf_ref.c index 0f4cda2b87..a3545c97ea 100644 --- a/ref_kernels/1f/bli_dotxf_ref.c +++ b/ref_kernels/1f/bli_dotxf_ref.c @@ -63,20 +63,20 @@ void PASTEMAC(ch,opname,arch,suf) \ ctype r[ ff ]; \ \ /* If beta is zero, clear y. Otherwise, scale by beta. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ - for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,set0s)( y[i] ); \ + for ( dim_t i = 0; i < ff; ++i ) bli_tset0s( ch, y[i] ); \ } \ else \ { \ - for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,scals)( *beta, y[i] ); \ + for ( dim_t i = 0; i < ff; ++i ) bli_tscals( ch,ch,ch, *beta, y[i] ); \ } \ \ /* If the vectors are empty or if alpha is zero, return early. */ \ - if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ + if ( bli_zero_dim1( m ) || bli_teq0s( ch, *alpha ) ) return; \ \ /* Initialize r vector to 0. */ \ - for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,set0s)( r[i] ); \ + for ( dim_t i = 0; i < ff; ++i ) bli_tset0s( ch, r[i] ); \ \ /* If a must be conjugated, we do so indirectly by first toggling the effective conjugation of x and then conjugating the resulting dot @@ -92,7 +92,7 @@ void PASTEMAC(ch,opname,arch,suf) \ for ( dim_t p = 0; p < m; ++p ) \ for ( dim_t i = 0; i < ff; ++i ) \ { \ - PASTEMAC(ch,axpys)( a[p + i*lda], x[p], r[i] ); \ + bli_taxpys( ch,ch,ch,ch, a[p + i*lda], x[p], r[i] ); \ } \ } \ else \ @@ -101,16 +101,16 @@ void PASTEMAC(ch,opname,arch,suf) \ for ( dim_t p = 0; p < m; ++p ) \ for ( dim_t i = 0; i < ff; ++i ) \ { \ - PASTEMAC(ch,axpyjs)( a[p + i*lda], x[p], r[i] ); \ + bli_taxpyjs( ch,ch,ch,ch, a[p + i*lda], x[p], r[i] ); \ } \ } \ \ if ( bli_is_conj( conjat ) ) \ - for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,conjs)( r[i] ); \ + for ( dim_t i = 0; i < ff; ++i ) bli_tconjs( ch, r[i] ); \ \ for ( dim_t i = 0; i < ff; ++i ) \ { \ - PASTEMAC(ch,axpys)( *alpha, r[i], y[i] ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, r[i], y[i] ); \ } \ } \ else \ diff --git a/ref_kernels/1m/bli_packm_cxc_diag_1er_ref.c b/ref_kernels/1m/bli_packm_cxc_diag_1er_ref.c index fad987c4bf..ec52e5271f 100644 --- a/ref_kernels/1m/bli_packm_cxc_diag_1er_ref.c +++ b/ref_kernels/1m/bli_packm_cxc_diag_1er_ref.c @@ -37,43 +37,45 @@ #define PACKM_SET_1E( chp_r, val_r, val_i, mnk ) \ do { \ - PASTEMAC(chp_r,copys)( val_r, *(pi1_ri + (mnk*2 + 0)*cdim_bcast + d + mnk*ldp2) ); \ - PASTEMAC(chp_r,copys)( val_i, *(pi1_ri + (mnk*2 + 1)*cdim_bcast + d + mnk*ldp2) ); \ - PASTEMAC(chp_r,copys)( -val_i, *(pi1_ir + (mnk*2 + 0)*cdim_bcast + d + mnk*ldp2) ); \ - PASTEMAC(chp_r,copys)( val_r, *(pi1_ir + (mnk*2 + 1)*cdim_bcast + d + mnk*ldp2) ); \ + bli_tcopys( chp_r,chp_r, val_r, *(pi1_ri + (mnk*2 + 0)*cdim_bcast + d + mnk*ldp2) ); \ + bli_tcopys( chp_r,chp_r, val_i, *(pi1_ri + (mnk*2 + 1)*cdim_bcast + d + mnk*ldp2) ); \ + bli_tcopys( chp_r,chp_r, -val_i, *(pi1_ir + (mnk*2 + 0)*cdim_bcast + d + mnk*ldp2) ); \ + bli_tcopys( chp_r,chp_r, val_r, *(pi1_ir + (mnk*2 + 1)*cdim_bcast + d + mnk*ldp2) ); \ } while (0) #define PACKM_SET_1R( chp_r, val_r, val_i, mnk ) \ do { \ - PASTEMAC(chp_r,copys)( val_r, *(pi1_r + mnk*cdim_bcast + d + mnk*ldp2) ); \ - PASTEMAC(chp_r,copys)( val_i, *(pi1_i + mnk*cdim_bcast + d + mnk*ldp2) ); \ + bli_tcopys( chp_r,chp_r, val_r, *(pi1_r + mnk*cdim_bcast + d + mnk*ldp2) ); \ + bli_tcopys( chp_r,chp_r, val_i, *(pi1_i + mnk*cdim_bcast + d + mnk*ldp2) ); \ } while (0) #define PACKM_SCAL_1E( ctypep_r, cha, chp, mn, k, op ) \ do { \ - ctypep_r alpha_r, alpha_i, ka_r, ka_i; \ - PASTEMAC(cha,chp,copyris)( *(alpha1 + mn *inca2 + 0 + k*lda2), \ - *(alpha1 + mn *inca2 + 1 + k*lda2), \ - alpha_r, alpha_i ); \ - PASTEMAC(chp,op)( kappa_r, kappa_i, alpha_r, alpha_i, ka_r, ka_i ); \ - PASTEMAC(chp,copyris)( ka_r, ka_i, *(pi1_ri + (mn*2 + 0)*cdim_bcast + d + k*ldp2), \ - *(pi1_ri + (mn*2 + 1)*cdim_bcast + d + k*ldp2) ); \ - PASTEMAC(chp,copyris)( -ka_i, ka_r, *(pi1_ir + (mn*2 + 0)*cdim_bcast + d + k*ldp2), \ - *(pi1_ir + (mn*2 + 1)*cdim_bcast + d + k*ldp2) ); \ + ctypep_r ka_r, ka_i; \ + PASTEMAC(t,op)( chp,cha,chp,chp, \ + kappa_r, kappa_i, \ + *(alpha1 + mn*inca2 + 0 + k*lda2), \ + *(alpha1 + mn*inca2 + 1 + k*lda2), \ + ka_r, ka_i ); \ + bli_tcopyris( chp,chp, ka_r, ka_i, *(pi1_ri + (mn*2 + 0)*cdim_bcast + d + k*ldp2), \ + *(pi1_ri + (mn*2 + 1)*cdim_bcast + d + k*ldp2) ); \ + bli_tcopyris( chp,chp, -ka_i, ka_r, *(pi1_ir + (mn*2 + 0)*cdim_bcast + d + k*ldp2), \ + *(pi1_ir + (mn*2 + 1)*cdim_bcast + d + k*ldp2) ); \ } while (0) #define PACKM_SCAL_1R( ctypep_r, cha, chp, mn, k, op ) \ do { \ - ctypep_r alpha_r, alpha_i, ka_r, ka_i; \ - PASTEMAC(cha,chp,copyris)( *(alpha1 + mn *inca2 + 0 + k*lda2), \ - *(alpha1 + mn *inca2 + 1 + k*lda2), \ - alpha_r, alpha_i ); \ - PASTEMAC(chp,op)( kappa_r, kappa_i, alpha_r, alpha_i, ka_r, ka_i ); \ - PASTEMAC(chp,copyris)( ka_r, ka_i, *(pi1_r + mn*cdim_bcast + d + k*ldp2), \ - *(pi1_i + mn*cdim_bcast + d + k*ldp2) ); \ + ctypep_r ka_r, ka_i; \ + PASTEMAC(t,op)( chp,cha,chp,chp, \ + kappa_r, kappa_i, \ + *(alpha1 + mn*inca2 + 0 + k*lda2), \ + *(alpha1 + mn*inca2 + 1 + k*lda2), \ + ka_r, ka_i ); \ + bli_tcopyris( chp,chp, ka_r, ka_i, *(pi1_r + mn*cdim_bcast + d + k*ldp2), \ + *(pi1_i + mn*cdim_bcast + d + k*ldp2) ); \ } while (0) @@ -154,8 +156,9 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ if ( bli_is_1e_packed( schema ) ) \ { \ /* start by zeroing out the whole block */ \ - PASTEMAC(chp_r,set0s_mxn) \ + bli_tset0s_mxn \ ( \ + chp_r, \ 2*cdim_max, \ 2*n_max, \ ( ctypep_r* )p, 1, ldp \ @@ -214,11 +217,11 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ for ( dim_t d = 0; d < cdim_bcast; ++d ) \ { \ ctypep_r alpha_r; \ - PASTEMAC(cha_r,chp_r,copys)( *(alpha1 + mnk*(inca2 + lda2)), alpha_r ); \ - PASTEMAC(chp_r,scal2s)( kappa_r, alpha_r, *(pi1_ri + (mnk*2 + 0)*cdim_bcast + d + mnk*ldp2) ); \ - PASTEMAC(chp_r,scal2s)( kappa_i, alpha_r, *(pi1_ri + (mnk*2 + 1)*cdim_bcast + d + mnk*ldp2) ); \ - PASTEMAC(chp_r,scal2s)( -kappa_i, alpha_r, *(pi1_ir + (mnk*2 + 0)*cdim_bcast + d + mnk*ldp2) ); \ - PASTEMAC(chp_r,scal2s)( kappa_r, alpha_r, *(pi1_ir + (mnk*2 + 1)*cdim_bcast + d + mnk*ldp2) ); \ + bli_tcopys( cha_r,chp_r, *(alpha1 + mnk*(inca2 + lda2)), alpha_r ); \ + bli_tscal2s( chp_r,chp_r,chp_r,chp_r, kappa_r, alpha_r, *(pi1_ri + (mnk*2 + 0)*cdim_bcast + d + mnk*ldp2) ); \ + bli_tscal2s( chp_r,chp_r,chp_r,chp_r, kappa_i, alpha_r, *(pi1_ri + (mnk*2 + 1)*cdim_bcast + d + mnk*ldp2) ); \ + bli_tscal2s( chp_r,chp_r,chp_r,chp_r, -kappa_i, alpha_r, *(pi1_ir + (mnk*2 + 0)*cdim_bcast + d + mnk*ldp2) ); \ + bli_tscal2s( chp_r,chp_r,chp_r,chp_r, kappa_r, alpha_r, *(pi1_ir + (mnk*2 + 1)*cdim_bcast + d + mnk*ldp2) ); \ } \ } \ else if ( bli_is_conj( conja )) \ @@ -240,9 +243,9 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ for ( dim_t mnk = 0; mnk < cdim; ++mnk ) \ for ( dim_t d = 0; d < cdim_bcast; ++d ) \ { \ - PASTEMAC(chp,invertris)( *(pi1_ri + (mnk*2 + 0)*cdim_bcast + d + mnk*ldp2), \ + bli_tinvertris( chp,chp, *(pi1_ri + (mnk*2 + 0)*cdim_bcast + d + mnk*ldp2), \ *(pi1_ri + (mnk*2 + 1)*cdim_bcast + d + mnk*ldp2) ); \ - PASTEMAC(chp,copyjris)( *(pi1_ri + (mnk*2 + 0)*cdim_bcast + d + mnk*ldp2), \ + bli_tcopyjris( chp,chp, *(pi1_ri + (mnk*2 + 0)*cdim_bcast + d + mnk*ldp2), \ *(pi1_ri + (mnk*2 + 1)*cdim_bcast + d + mnk*ldp2), \ *(pi1_ir + (mnk*2 + 1)*cdim_bcast + d + mnk*ldp2), \ *(pi1_ir + (mnk*2 + 0)*cdim_bcast + d + mnk*ldp2) ); \ @@ -257,8 +260,9 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ else /* bli_is_1r_packed( schema ) */ \ { \ /* start by zeroing out the whole block */ \ - PASTEMAC(chp_r,set0s_mxn) \ + bli_tset0s_mxn \ ( \ + chp_r, \ cdim_max, \ 2*n_max, \ ( ctypep_r* )p, 1, ldp \ @@ -317,9 +321,9 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ for ( dim_t d = 0; d < cdim_bcast; ++d ) \ { \ ctypep_r alpha_r; \ - PASTEMAC(cha_r,chp_r,copys)( *(alpha1 + mnk*(inca2 + lda2)), alpha_r ); \ - PASTEMAC(chp_r,scal2s)( kappa_r, alpha_r, *(pi1_r + mnk*(cdim_bcast + ldp2) + d) ); \ - PASTEMAC(chp_r,scal2s)( kappa_i, alpha_r, *(pi1_i + mnk*(cdim_bcast + ldp2) + d) ); \ + bli_tcopys( cha_r,chp_r, *(alpha1 + mnk*(inca2 + lda2)), alpha_r ); \ + bli_tscal2s( chp_r,chp_r,chp_r,chp_r, kappa_r, alpha_r, *(pi1_r + mnk*(cdim_bcast + ldp2) + d) ); \ + bli_tscal2s( chp_r,chp_r,chp_r,chp_r, kappa_i, alpha_r, *(pi1_i + mnk*(cdim_bcast + ldp2) + d) ); \ } \ } \ else if ( bli_is_conj( conja ) ) \ @@ -340,7 +344,7 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ { \ for ( dim_t mnk = 0; mnk < cdim; ++mnk ) \ for ( dim_t d = 0; d < cdim_bcast; ++d ) \ - PASTEMAC(chp,invertris)( *(pi1_r + mnk*(cdim_bcast + ldp2) + d), \ + bli_tinvertris( chp,chp, *(pi1_r + mnk*(cdim_bcast + ldp2) + d), \ *(pi1_i + mnk*(cdim_bcast + ldp2) + d) ); \ } \ \ diff --git a/ref_kernels/1m/bli_packm_cxc_diag_ref.c b/ref_kernels/1m/bli_packm_cxc_diag_ref.c index 635bb9900b..82976ebde0 100644 --- a/ref_kernels/1m/bli_packm_cxc_diag_ref.c +++ b/ref_kernels/1m/bli_packm_cxc_diag_ref.c @@ -42,11 +42,10 @@ do \ for ( dim_t k = 0; k < cdim; k++ ) \ for ( dim_t mn = mn_min; mn < mn_max; mn++ ) \ { \ - ctypep alpha_cast, kappa_alpha; \ - PASTEMAC(cha,chp,copys)( *(alpha1 + mn*inca + k*lda), alpha_cast ); \ - PASTEMAC(chp,op)( kappa_cast, alpha_cast, kappa_alpha ); \ + ctypep kappa_alpha; \ + PASTEMAC(t,op)( chp,cha,chp,chp, kappa_cast, *(alpha1 + mn*inca + k*lda), kappa_alpha ); \ for ( dim_t d = 0; d < dfac; d++ ) \ - PASTEMAC(chp,copys)( kappa_alpha, *(pi1 + mn*dfac + d + k*ldp) ); \ + bli_tcopys( chp,chp, kappa_alpha, *(pi1 + mn*dfac + d + k*ldp) ); \ } \ } while(0) @@ -81,11 +80,12 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ ) \ { \ /* start by zeroing out the whole block */ \ - PASTEMAC(chp,set0s_mxn) \ + bli_tset0s_mxn \ ( \ + chp, \ cdim_max, \ n_max, \ - p, 1, ldp \ + ( ctypep* )p, 1, ldp \ ); \ \ ctypep kappa_cast = *( ctypep* )kappa; \ @@ -134,40 +134,38 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ { \ for ( dim_t mnk = 0; mnk < cdim; ++mnk ) \ for ( dim_t d = 0; d < cdim_bcast; ++d ) \ - PASTEMAC(chp,copys)( kappa_cast, *(pi1 + mnk*(cdim_bcast + ldp) + d) ); \ + bli_tcopys( chp,chp, kappa_cast, *(pi1 + mnk*(cdim_bcast + ldp) + d) ); \ } \ else if ( bli_is_hermitian( struca ) ) \ { \ for ( dim_t mnk = 0; mnk < cdim; ++mnk ) \ { \ ctypep alpha_cast, kappa_alpha; \ - PASTEMAC(cha,chp,copys)( *(alpha1 + mnk*(inca + lda)), alpha_cast ); \ - PASTEMAC(chp,seti0s)( alpha_cast ); \ - PASTEMAC(chp,scal2s)( kappa_cast, alpha_cast, kappa_alpha ); \ + bli_tcopys( cha,chp, *(alpha1 + mnk*(inca + lda)), alpha_cast ); \ + bli_tseti0s( chp, alpha_cast ); \ + bli_tscal2s( chp,chp,chp,chp, kappa_cast, alpha_cast, kappa_alpha ); \ for ( dim_t d = 0; d < cdim_bcast; ++d ) \ - PASTEMAC(chp,copys)( kappa_alpha, *(pi1 + mnk*(cdim_bcast + ldp) + d) ); \ + bli_tcopys( chp,chp, kappa_alpha, *(pi1 + mnk*(cdim_bcast + ldp) + d) ); \ } \ } \ else if ( bli_is_conj( conja )) \ { \ for ( dim_t mnk = 0; mnk < cdim; ++mnk ) \ { \ - ctypep alpha_cast, kappa_alpha; \ - PASTEMAC(cha,chp,copys)( *(alpha1 + mnk*(inca + lda)), alpha_cast ); \ - PASTEMAC(chp,scal2js)( kappa_cast, alpha_cast, kappa_alpha ); \ + ctypep kappa_alpha; \ + bli_tscal2js( chp,cha,chp,chp, kappa_cast, *(alpha1 + mnk*(inca + lda)), kappa_alpha ); \ for ( dim_t d = 0; d < cdim_bcast; ++d ) \ - PASTEMAC(chp,copys)( kappa_alpha, *(pi1 + mnk*(cdim_bcast + ldp) + d) ); \ + bli_tcopys( chp,chp, kappa_alpha, *(pi1 + mnk*(cdim_bcast + ldp) + d) ); \ } \ } \ else \ { \ for ( dim_t mnk = 0; mnk < cdim; ++mnk ) \ { \ - ctypep alpha_cast, kappa_alpha; \ - PASTEMAC(cha,chp,copys)( *(alpha1 + mnk*(inca + lda)), alpha_cast ); \ - PASTEMAC(chp,scal2s)( kappa_cast, alpha_cast, kappa_alpha ); \ + ctypep kappa_alpha; \ + bli_tscal2s( chp,cha,chp,chp, kappa_cast, *(alpha1 + mnk*(inca + lda)), kappa_alpha ); \ for ( dim_t d = 0; d < cdim_bcast; ++d ) \ - PASTEMAC(chp,copys)( kappa_alpha, *(pi1 + mnk*(cdim_bcast + ldp) + d) ); \ + bli_tcopys( chp,chp, kappa_alpha, *(pi1 + mnk*(cdim_bcast + ldp) + d) ); \ } \ } \ \ @@ -176,13 +174,13 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ { \ for ( dim_t mnk = 0; mnk < cdim; ++mnk ) \ for ( dim_t d = 0; d < cdim_bcast; ++d ) \ - PASTEMAC(chp,inverts)( *(pi1 + mnk*(cdim_bcast + ldp) + d) ); \ + bli_tinverts( chp,chp, *(pi1 + mnk*(cdim_bcast + ldp) + d) ); \ } \ \ /* if this an edge case in both directions, extend the diagonal with ones */ \ for ( dim_t mnk = cdim; mnk < bli_min( cdim_max, n_max ); ++mnk ) \ for ( dim_t d = 0; d < cdim_bcast; ++d ) \ - PASTEMAC(chp,set1s)( *(pi1 + mnk*(cdim_bcast + ldp) + d) ); \ + bli_tset1s( chp, *(pi1 + mnk*(cdim_bcast + ldp) + d) ); \ } INSERT_GENTFUNC2_BASIC( packm_diag, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) diff --git a/ref_kernels/1m/bli_packm_cxc_diag_ro_ref.c b/ref_kernels/1m/bli_packm_cxc_diag_ro_ref.c index bb6fe939e6..a520158fcb 100644 --- a/ref_kernels/1m/bli_packm_cxc_diag_ro_ref.c +++ b/ref_kernels/1m/bli_packm_cxc_diag_ro_ref.c @@ -37,18 +37,19 @@ #define PACKM_SET_RO( chp_r, val, mnk ) \ do { \ - PASTEMAC(chp_r,copys)( val, *(pi1_r + mnk*cdim_bcast + d + mnk*ldp) ); \ + bli_tcopys( chp_r,chp_r, val, *(pi1_r + mnk*cdim_bcast + d + mnk*ldp) ); \ } while (0) #define PACKM_SCAL_RO( ctypep_r, cha, chp, chp_r, mn, k, op ) \ do { \ - ctypep_r alpha_r, alpha_i, ka_r, ka_i; (void)ka_i; \ - PASTEMAC(cha,chp,copyris)( *(alpha1 + mn *inca2 + 0 + k*lda2), \ - *(alpha1 + mn *inca2 + 1 + k*lda2), \ - alpha_r, alpha_i ); \ - PASTEMAC(chp,op)( kappa_r, kappa_i, alpha_r, alpha_i, ka_r, ka_i ); \ - PASTEMAC(chp_r,copys)( ka_r, *(pi1_r + mn*cdim_bcast + d + k*ldp) ); \ + ctypep_r ka_r, ka_i; (void)ka_i; \ + PASTEMAC(t,op)( chp,cha,chp,chp, \ + kappa_r, kappa_i, \ + *(alpha1 + mn*inca2 + 0 + k*lda2), \ + *(alpha1 + mn*inca2 + 1 + k*lda2), \ + ka_r, ka_i ); \ + bli_tcopys( chp_r,chp_r, ka_r, *(pi1_r + mn*cdim_bcast + d + k*ldp) ); \ } while (0) @@ -104,8 +105,9 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ const ctypea_r* restrict alpha1 = ( const ctypea_r* )a; \ \ /* start by zeroing out the whole block */ \ - PASTEMAC(chp_r,set0s_mxn) \ + bli_tset0s_mxn \ ( \ + chp_r, \ cdim_max, \ n_max, \ ( ctypep_r* )p, 1, ldp \ @@ -161,11 +163,10 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ { \ for ( dim_t mnk = 0; mnk < cdim; ++mnk ) \ for ( dim_t d = 0; d < cdim_bcast; ++d ) \ - { \ - ctypep_r alpha_r; \ - PASTEMAC(cha_r,chp_r,copys)( *(alpha1 + mnk*(inca2 + lda2)), alpha_r ); \ - PASTEMAC(chp_r,scal2s)( kappa_r, alpha_r, *(pi1_r + mnk*(cdim_bcast + ldp) + d) ); \ - } \ + bli_tscal2s( chp_r,cha_r,chp_r,chp_r, \ + kappa_r, \ + *(alpha1 + mnk*(inca2 + lda2)), \ + *(pi1_r + mnk*(cdim_bcast + ldp) + d) ); \ } \ else if ( bli_is_conj( conja ) ) \ { \ diff --git a/ref_kernels/1m/bli_packm_cxk_1er_ref.c b/ref_kernels/1m/bli_packm_cxk_1er_ref.c index 5115628cd4..e4350384d6 100644 --- a/ref_kernels/1m/bli_packm_cxk_1er_ref.c +++ b/ref_kernels/1m/bli_packm_cxk_1er_ref.c @@ -44,13 +44,15 @@ do \ pragma \ for ( dim_t mn = 0; mn < cdim; ++mn ) \ { \ - ctypep_r alpha_r, alpha_i, ka_r, ka_i; \ - PASTEMAC(cha,chp,copyris)( *(alpha1 + mn*inca2 + 0), *(alpha1 + mn*inca2 + 1), alpha_r, alpha_i ); \ - PASTEMAC(chp,op)( kappa_r, kappa_i, alpha_r, alpha_i, ka_r, ka_i ); \ + ctypep_r ka_r, ka_i; \ + PASTEMAC(t,op)( chp,cha,chp,chp, \ + kappa_r, kappa_i, \ + *(alpha1 + mn*inca2 + 0), *(alpha1 + mn*inca2 + 1), \ + ka_r, ka_i ); \ for ( dim_t d = 0; d < dfac; ++d ) \ { \ - PASTEMAC(chp,copyris)( ka_r, ka_i, *(pi1_ri + (mn*2 + 0)*dfac + d), *(pi1_ri + (mn*2 + 1)*dfac + d) ); \ - PASTEMAC(chp,copyris)( -ka_i, ka_r, *(pi1_ir + (mn*2 + 0)*dfac + d), *(pi1_ir + (mn*2 + 1)*dfac + d) ); \ + bli_tcopyris( chp,chp, ka_r, ka_i, *(pi1_ri + (mn*2 + 0)*dfac + d), *(pi1_ri + (mn*2 + 1)*dfac + d) ); \ + bli_tcopyris( chp,chp, -ka_i, ka_r, *(pi1_ir + (mn*2 + 0)*dfac + d), *(pi1_ir + (mn*2 + 1)*dfac + d) ); \ } \ } \ \ @@ -70,11 +72,13 @@ do \ pragma \ for ( dim_t mn = 0; mn < cdim; ++mn ) \ { \ - ctypep_r alpha_r, alpha_i, ka_r, ka_i; \ - PASTEMAC(cha,chp,copyris)( *(alpha1 + mn*inca2 + 0), *(alpha1 + mn*inca2 + 1), alpha_r, alpha_i ); \ - PASTEMAC(chp,op)( kappa_r, kappa_i, alpha_r, alpha_i, ka_r, ka_i ); \ + ctypep_r ka_r, ka_i; \ + PASTEMAC(t,op)( chp,cha,chp,chp, \ + kappa_r, kappa_i, \ + *(alpha1 + mn*inca2 + 0), *(alpha1 + mn*inca2 + 1), \ + ka_r, ka_i ); \ for ( dim_t d = 0; d < dfac; ++d ) \ - PASTEMAC(chp,copyris)( ka_r, ka_i, *(pi1_r + mn*dfac + d), *(pi1_i + mn*dfac + d) ); \ + bli_tcopyris( chp,chp, ka_r, ka_i, *(pi1_r + mn*dfac + d), *(pi1_i + mn*dfac + d) ); \ } \ \ alpha1 += lda2; \ @@ -153,8 +157,9 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ else PACKM_1E_BODY( ctypep_r, cha, chp, , cdim, cdim_bcast, inca2, scal2ris ); \ } \ \ - PASTEMAC(chp_r,set0s_edge) \ + bli_tset0s_edge \ ( \ + chp_r, \ cdim2*cdim_bcast, 2*cdim_max*cdim_bcast, \ 2*n, 2*n_max, \ ( ctypep_r* )p, ldp \ @@ -204,8 +209,9 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ else PACKM_1R_BODY( ctypep_r, cha, chp, , cdim, cdim_bcast, inca2, scal2ris ); \ } \ \ - PASTEMAC(chp_r,set0s_edge) \ + bli_tset0s_edge \ ( \ + chp_r, \ cdim*cdim_bcast, cdim_max*cdim_bcast, \ 2*n, 2*n_max, \ ( ctypep_r* )p, ldp \ diff --git a/ref_kernels/1m/bli_packm_cxk_ref.c b/ref_kernels/1m/bli_packm_cxk_ref.c index 5cca515aed..11b692af35 100644 --- a/ref_kernels/1m/bli_packm_cxk_ref.c +++ b/ref_kernels/1m/bli_packm_cxk_ref.c @@ -35,7 +35,7 @@ #include "blis.h" -#define PACKM_BODY( ctypea, ctypep, cha, chp, pragma, cdim, dfac, inca, op ) \ +#define PACKM_BODY_r( ctypea, ctypep, cha, chp, pragma, cdim, dfac, inca, op ) \ \ do \ { \ @@ -44,11 +44,10 @@ do \ pragma \ for ( dim_t mn = 0; mn < cdim; mn++ ) \ { \ - ctypep alpha_cast, kappa_alpha; \ - PASTEMAC(cha,chp,copys)( *(alpha1 + mn*inca), alpha_cast ); \ - PASTEMAC(chp,op)( kappa_cast, alpha_cast, kappa_alpha ); \ + ctypep kappa_alpha; \ + PASTEMAC(t,op)( chp,cha,chp,chp, kappa_cast, *(alpha1 + mn*inca), kappa_alpha ); \ for ( dim_t d = 0; d < dfac; d++ ) \ - PASTEMAC(chp,copys)( kappa_alpha, *(pi1 + mn*dfac + d) ); \ + bli_tcopys( chp,chp, kappa_alpha, *(pi1 + mn*dfac + d) ); \ } \ \ alpha1 += lda; \ @@ -57,6 +56,42 @@ do \ } while(0) +#define PACKM_BODY_c_( ctypea, ctypep, ctypep_r, cha, chp, chp_r, pragma, cdim, dfac, inca, op ) \ +\ +do \ +{ \ + for ( dim_t k = n; k != 0; --k ) \ + { \ + pragma \ + for ( dim_t mn = 0; mn < cdim; mn++ ) \ + { \ + ctypep kappa_alpha; \ + PASTEMAC(t,op)( chp,cha,chp,chp, kappa_cast, *(alpha1 + mn*inca), kappa_alpha ); \ + ctypep_r kar, kai; \ + bli_tgets( chp,chp, kappa_alpha, kar, kai ); \ + ctypep_r* pi1r = (ctypep_r*)pi1; \ + ctypep_r* pi1i = (ctypep_r*)pi1 + dfac; \ + for ( dim_t d = 0; d < dfac; d++ ) \ + { \ + bli_tcopys( chp_r,chp_r, kar, *(pi1r + mn*dfac*2 + d) ); \ + bli_tcopys( chp_r,chp_r, kai, *(pi1i + mn*dfac*2 + d) ); \ + } \ + } \ +\ + alpha1 += lda; \ + pi1 += ldp; \ + } \ +} while(0) + + +#define PACKM_BODY_c( ctypea, ctypep, cha, chp, pragma, cdim, dfac, inca, op ) \ +PACKM_BODY_c_( ctypea, ctypep, PASTEMAC(chp,ctyper), cha, chp, PASTEMAC(chp,prec), pragma, cdim, dfac, inca, op ) + + +#define PACKM_BODY( ctypea, ctypep, cha, chp, pragma, cdim, dfac, inca, op ) \ +PASTECH(PACKM_BODY_,PASTEMAC(chp,dom))( ctypea, ctypep, cha, chp, pragma, cdim, dfac, inca, op ) + + #undef GENTFUNC2 #define GENTFUNC2( ctypea, ctypep, cha, chp, opname, arch, suf ) \ \ @@ -117,11 +152,12 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ else PACKM_BODY( ctypea, ctypep, cha, chp, , cdim, cdim_bcast, inca, scal2s ); \ } \ \ - PASTEMAC(chp,set0s_edge) \ + bli_tset0s_edge \ ( \ + chp, \ cdim*cdim_bcast, cdim_max*cdim_bcast, \ n, n_max, \ - p, ldp \ + ( ctypep* )p, ldp \ ); \ } diff --git a/ref_kernels/1m/bli_packm_cxk_ro_ref.c b/ref_kernels/1m/bli_packm_cxk_ro_ref.c index a8165351d9..549e18d2e9 100644 --- a/ref_kernels/1m/bli_packm_cxk_ro_ref.c +++ b/ref_kernels/1m/bli_packm_cxk_ro_ref.c @@ -44,12 +44,14 @@ do \ pragma \ for ( dim_t mn = 0; mn < cdim; ++mn ) \ { \ - ctypep_r alpha_r, alpha_i, ka_r, ka_i; \ + ctypep_r ka_r, ka_i; \ ( void )ka_i; \ - PASTEMAC(cha,chp,copyris)( *(alpha1 + mn*inca2 + 0), *(alpha1 + mn*inca2 + 1), alpha_r, alpha_i ); \ - PASTEMAC(chp,op)( kappa_r, kappa_i, alpha_r, alpha_i, ka_r, ka_i ); \ + PASTEMAC(t,op)( chp,cha,chp,chp, \ + kappa_r, kappa_i, \ + *(alpha1 + mn*inca2 + 0), *(alpha1 + mn*inca2 + 1), \ + ka_r, ka_i ); \ for ( dim_t d = 0; d < dfac; ++d ) \ - PASTEMAC(chp_r,copys)( ka_r, *(pi1_r + mn*dfac + d) ); \ + bli_tcopys( chp_r,chp_r, ka_r, *(pi1_r + mn*dfac + d) ); \ } \ \ alpha1 += lda2; \ @@ -122,8 +124,9 @@ void PASTEMAC(cha,chp,opname,arch,suf) \ else PACKM_RO_BODY( ctypep_r, cha, chp, chp_r, , cdim, cdim_bcast, inca2, scal2ris ); \ } \ \ - PASTEMAC(chp_r,set0s_edge) \ + bli_tset0s_edge \ ( \ + chp_r, \ cdim*cdim_bcast, cdim_max*cdim_bcast, \ n, n_max, \ ( ctypep_r* )p, ldp \ diff --git a/ref_kernels/1m/bli_unpackm_cxk_ref.c b/ref_kernels/1m/bli_unpackm_cxk_ref.c index 071f5c4abd..2cf7149e35 100644 --- a/ref_kernels/1m/bli_unpackm_cxk_ref.c +++ b/ref_kernels/1m/bli_unpackm_cxk_ref.c @@ -43,11 +43,7 @@ do \ { \ pragma \ for ( dim_t mn = 0; mn < cdim; mn++ ) \ - { \ - ctypep kappa_pi; \ - PASTEMAC(chp,op)( *kappa_cast, *(pi1 + mn*dfac), kappa_pi ); \ - PASTEMAC(chp,cha,copys)( kappa_pi, *(alpha1 + mn*inca) ); \ - } \ + PASTEMAC(t,op)( chp,chp,cha,chp, *kappa_cast, *(pi1 + mn*dfac), *(alpha1 + mn*inca) ); \ \ alpha1 += lda; \ pi1 += ldp; \ diff --git a/ref_kernels/3/bli_gemm_ref.c b/ref_kernels/3/bli_gemm_ref.c index ab861bcb56..06e7f26324 100644 --- a/ref_kernels/3/bli_gemm_ref.c +++ b/ref_kernels/3/bli_gemm_ref.c @@ -37,8 +37,8 @@ // Completely generic gemm ukr implementation which checks MR/NR at // runtime. Very slow, but has to be used in certain cases. -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, arch, suf ) \ +#undef GENTFUNCR +#define GENTFUNCR( ctype, ctype_r, ch, chr, opname, arch, suf ) \ \ static void PASTEMAC(ch,ch,opname,arch,suf) \ ( \ @@ -80,7 +80,7 @@ static void PASTEMAC(ch,ch,opname,arch,suf) \ /* Initialize the accumulator elements in ab to zero. */ \ for ( dim_t i = 0; i < m * n; ++i ) \ { \ - PASTEMAC(ch,set0s)( *(ab + i) ); \ + bli_tset0s( ch, *(ab + i) ); \ } \ \ /* Perform a series of k rank-1 updates into ab. */ \ @@ -92,13 +92,19 @@ static void PASTEMAC(ch,ch,opname,arch,suf) \ are typically fully unrolled. */ \ for ( dim_t j = 0; j < n; ++j ) \ { \ - ctype bj = *(b + j*cs_b); \ + ctype bj; \ + const ctype_r* b_r = (const ctype_r*)(b + j*cs_b); \ + const ctype_r* b_i = b_r + cs_b; (void)b_i; \ + bli_tsets( ch,ch, *b_r, *b_i, bj ); \ \ for ( dim_t i = 0; i < m; ++i ) \ { \ - ctype ai = *(a + i*rs_a); \ + ctype ai; \ + const ctype_r* a_r = (const ctype_r*)(a + i*rs_a); \ + const ctype_r* a_i = a_r + rs_a; (void)a_i; \ + bli_tsets( ch,ch, *a_r, *a_i, ai ); \ \ - PASTEMAC(ch,dots)( ai, bj, *abij ); \ + bli_tdots( ch,ch,ch,ch, ai, bj, *abij ); \ \ abij += rs_ab; \ } \ @@ -111,15 +117,17 @@ static void PASTEMAC(ch,ch,opname,arch,suf) \ /* Scale the result in ab by alpha. */ \ for ( dim_t i = 0; i < m * n; ++i ) \ { \ - PASTEMAC(ch,scals)( *alpha, *(ab + i) ); \ + bli_tscals( ch,ch,ch, *alpha, *(ab + i) ); \ } \ \ /* If beta is zero, overwrite c with the scaled result in ab. Otherwise, scale by beta and then add the scaled redult in ab. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ - PASTEMAC(ch,copys_mxn) \ + bli_tcopys_mxn \ ( \ + ch, \ + ch, \ m, \ n, \ ab, rs_ab, cs_ab, \ @@ -128,8 +136,12 @@ static void PASTEMAC(ch,ch,opname,arch,suf) \ } \ else \ { \ - PASTEMAC(ch,xpbys_mxn) \ + bli_txpbys_mxn \ ( \ + ch, \ + ch, \ + ch, \ + ch, \ m, \ n, \ ab, rs_ab, cs_ab, \ @@ -139,7 +151,7 @@ static void PASTEMAC(ch,ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNC_BASIC( gemm_gen, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCR_BASIC( gemm_gen, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) // An implementation that attempts to facilitate emission of vectorized // instructions via constant loop bounds + #pragma omp simd directives. @@ -171,12 +183,17 @@ void PASTEMAC(ch,ch,opname,arch,suf) \ \ const dim_t mr = PASTECH(BLIS_MR_,ch); \ const dim_t nr = PASTECH(BLIS_NR_,ch); \ +\ + const inc_t rs_a = PASTECH(BLIS_BBM_,ch); \ + const inc_t cs_a = PASTECH(BLIS_PACKMR_,ch); \ + const inc_t rs_b = PASTECH(BLIS_PACKNR_,ch); \ + const inc_t cs_b = PASTECH(BLIS_BBN_,ch); \ \ /* If either BLIS_MR_? or BLIS_NR_? was left undefined by the subconfig, the compiler can't fully unroll the MR and NR loop iterations below, which means there's no benefit to using this kernel over a general- purpose implementation instead. */ \ - if ( mr == -1 || nr == -1 ) \ + if ( mr == -1 || nr == -1 || rs_a != 1 || cs_b != 1 ) \ { \ PASTEMAC(ch,ch,gemm_gen,arch,suf) \ ( \ @@ -199,18 +216,13 @@ void PASTEMAC(ch,ch,opname,arch,suf) \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const inc_t rs_ab = nr; \ const inc_t cs_ab = 1; \ -\ - const inc_t rs_a = PASTECH(BLIS_BBM_,ch); \ - const inc_t cs_a = PASTECH(BLIS_PACKMR_,ch); \ - const inc_t rs_b = PASTECH(BLIS_PACKNR_,ch); \ - const inc_t cs_b = PASTECH(BLIS_BBN_,ch); \ \ \ /* Initialize the accumulator elements in ab to zero. */ \ PRAGMA_SIMD \ for ( dim_t i = 0; i < mr * nr; ++i ) \ { \ - PASTEMAC(ch,set0s)( ab[ i ] ); \ + bli_tset0s( ch, ab[ i ] ); \ } \ \ /* Perform a series of k rank-1 updates into ab. */ \ @@ -221,8 +233,9 @@ void PASTEMAC(ch,ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t j = 0; j < nr; ++j ) \ { \ - PASTEMAC(ch,dots) \ + bli_tdots \ ( \ + ch,ch,ch,ch, \ a[ i*rs_a ], \ b[ j*cs_b ], \ ab[ i*rs_ab + j*cs_ab ] \ @@ -238,7 +251,7 @@ void PASTEMAC(ch,ch,opname,arch,suf) \ PRAGMA_SIMD \ for ( dim_t i = 0; i < mr * nr; ++i ) \ { \ - PASTEMAC(ch,scals)( *alpha, ab[ i ] ); \ + bli_tscals( ch,ch,ch, *alpha, ab[ i ] ); \ } \ \ /* Output/accumulate intermediate result ab based on the storage @@ -247,12 +260,13 @@ void PASTEMAC(ch,ch,opname,arch,suf) \ { \ /* C is row-stored. */ \ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if ( bli_teq0s( ch, *beta ) ) \ { \ for ( dim_t i = 0; i < m; ++i ) \ for ( dim_t j = 0; j < n; ++j ) \ - PASTEMAC(ch,copys) \ + bli_tcopys \ ( \ + ch,ch, \ ab[ i*rs_ab + j*cs_ab ], \ c [ i*rs_c + j*1 ] \ ); \ @@ -261,8 +275,9 @@ void PASTEMAC(ch,ch,opname,arch,suf) \ { \ for ( dim_t i = 0; i < m; ++i ) \ for ( dim_t j = 0; j < n; ++j ) \ - PASTEMAC(ch,xpbys) \ + bli_txpbys \ ( \ + ch,ch,ch,ch, \ ab[ i*rs_ab + j*cs_ab ], \ *beta, \ c [ i*rs_c + j*1 ] \ @@ -273,12 +288,13 @@ void PASTEMAC(ch,ch,opname,arch,suf) \ { \ /* C is column-stored or general-stored. */ \ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ + if (bli_teq0s( ch, *beta ) ) \ { \ for ( dim_t j = 0; j < n; ++j ) \ for ( dim_t i = 0; i < m; ++i ) \ - PASTEMAC(ch,copys) \ + bli_tcopys \ ( \ + ch,ch, \ ab[ i*rs_ab + j*cs_ab ], \ c [ i*rs_c + j*cs_c ] \ ); \ @@ -287,8 +303,9 @@ void PASTEMAC(ch,ch,opname,arch,suf) \ { \ for ( dim_t j = 0; j < n; ++j ) \ for ( dim_t i = 0; i < m; ++i ) \ - PASTEMAC(ch,xpbys) \ + bli_txpbys \ ( \ + ch,ch,ch,ch, \ ab[ i*rs_ab + j*cs_ab ], \ *beta, \ c [ i*rs_c + j*cs_c ] \ @@ -355,8 +372,9 @@ void PASTEMAC(chab,chc,opname,arch,suf) \ cntx \ ); \ \ - PASTEMAC(chab,chc,chc,xpbys_mxn) \ + bli_txpbys_mxn \ ( \ + chab,chc,chc,chc, \ m, n, \ ct, rs_ct, cs_ct, \ beta, \ diff --git a/ref_kernels/3/bli_gemmsup_ref.c b/ref_kernels/3/bli_gemmsup_ref.c index 9dab9e0922..7f2fa50dde 100644 --- a/ref_kernels/3/bli_gemmsup_ref.c +++ b/ref_kernels/3/bli_gemmsup_ref.c @@ -80,7 +80,7 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict bj = &b [ j*cs_b ]; \ ctype ab; \ \ - PASTEMAC(ch,set0s)( ab ); \ + bli_tset0s( ch, ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ @@ -88,23 +88,23 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict aij = &ai[ l*cs_a ]; \ const ctype* restrict bij = &bj[ l*rs_b ]; \ \ - PASTEMAC(ch,dots)( *aij, *bij, ab ); \ + bli_tdots( ch,ch,ch,ch, *aij, *bij, ab ); \ } \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ - if ( PASTEMAC(ch,eq1)( *beta ) ) \ + if ( bli_teq1s( ch, *beta ) ) \ { \ - PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ - else if ( PASTEMAC(ch,eq0)( *beta ) ) \ + else if ( bli_teq0s( ch, *beta ) ) \ { \ - PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ else \ { \ - PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ + bli_taxpbys( ch,ch,ch,ch,ch, *alpha, ab, *beta, *cij ); \ } \ } \ } \ @@ -123,7 +123,7 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict bj = &b [ j*cs_b ]; \ ctype ab; \ \ - PASTEMAC(ch,set0s)( ab ); \ + bli_tset0s( ch, ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ @@ -131,23 +131,23 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict aij = &ai[ l*cs_a ]; \ const ctype* restrict bij = &bj[ l*rs_b ]; \ \ - PASTEMAC(ch,axpyjs)( *aij, *bij, ab ); \ + bli_taxpyjs( ch,ch,ch,ch, *aij, *bij, ab ); \ } \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ - if ( PASTEMAC(ch,eq1)( *beta ) ) \ + if ( bli_teq1s( ch, *beta ) ) \ { \ - PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ - else if ( PASTEMAC(ch,eq0)( *beta ) ) \ + else if ( bli_teq0s( ch, *beta ) ) \ { \ - PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ else \ { \ - PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ + bli_taxpbys( ch,ch,ch,ch,ch, *alpha, ab, *beta, *cij ); \ } \ } \ } \ @@ -166,7 +166,7 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict bj = &b [ j*cs_b ]; \ ctype ab; \ \ - PASTEMAC(ch,set0s)( ab ); \ + bli_tset0s( ch, ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ @@ -174,23 +174,23 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict aij = &ai[ l*cs_a ]; \ const ctype* restrict bij = &bj[ l*rs_b ]; \ \ - PASTEMAC(ch,dotjs)( *aij, *bij, ab ); \ + bli_tdotjs( ch,ch,ch,ch, *aij, *bij, ab ); \ } \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ - if ( PASTEMAC(ch,eq1)( *beta ) ) \ + if ( bli_teq1s( ch, *beta ) ) \ { \ - PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ - else if ( PASTEMAC(ch,eq0)( *beta ) ) \ + else if ( bli_teq0s( ch, *beta ) ) \ { \ - PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ else \ { \ - PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ + bli_taxpbys( ch,ch,ch,ch,ch, *alpha, ab, *beta, *cij ); \ } \ } \ } \ @@ -209,7 +209,7 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict bj = &b [ j*cs_b ]; \ ctype ab; \ \ - PASTEMAC(ch,set0s)( ab ); \ + bli_tset0s( ch, ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ @@ -217,26 +217,26 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict aij = &ai[ l*cs_a ]; \ const ctype* restrict bij = &bj[ l*rs_b ]; \ \ - PASTEMAC(ch,dots)( *aij, *bij, ab ); \ + bli_tdots( ch,ch,ch,ch, *aij, *bij, ab ); \ } \ \ /* Conjugate the result to simulate conj(a^T) * conj(b). */ \ - PASTEMAC(ch,conjs)( ab ); \ + bli_tconjs( ch, ab ); \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ - if ( PASTEMAC(ch,eq1)( *beta ) ) \ + if ( bli_teq1s( ch, *beta ) ) \ { \ - PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ - else if ( PASTEMAC(ch,eq0)( *beta ) ) \ + else if ( bli_teq0s( ch, *beta ) ) \ { \ - PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ else \ { \ - PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ + bli_taxpbys( ch,ch,ch,ch,ch, *alpha, ab, *beta, *cij ); \ } \ } \ } \ @@ -291,7 +291,7 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict ai = &a [ i*rs_a ]; \ ctype ab; \ \ - PASTEMAC(ch,set0s)( ab ); \ + bli_tset0s( ch, ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ @@ -299,23 +299,23 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict aij = &ai[ l*cs_a ]; \ const ctype* restrict bij = &bj[ l*rs_b ]; \ \ - PASTEMAC(ch,dots)( *aij, *bij, ab ); \ + bli_tdots( ch,ch,ch,ch, *aij, *bij, ab ); \ } \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ - if ( PASTEMAC(ch,eq1)( *beta ) ) \ + if ( bli_teq1s( ch, *beta ) ) \ { \ - PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ - else if ( PASTEMAC(ch,eq0)( *beta ) ) \ + else if ( bli_teq0s( ch, *beta ) ) \ { \ - PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ else \ { \ - PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ + bli_taxpbys( ch,ch,ch,ch,ch, *alpha, ab, *beta, *cij ); \ } \ } \ } \ @@ -334,7 +334,7 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict ai = &a [ i*rs_a ]; \ ctype ab; \ \ - PASTEMAC(ch,set0s)( ab ); \ + bli_tset0s( ch, ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ @@ -342,23 +342,23 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict aij = &ai[ l*cs_a ]; \ const ctype* restrict bij = &bj[ l*rs_b ]; \ \ - PASTEMAC(ch,axpyjs)( *aij, *bij, ab ); \ + bli_taxpyjs( ch,ch,ch,ch, *aij, *bij, ab ); \ } \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ - if ( PASTEMAC(ch,eq1)( *beta ) ) \ + if ( bli_teq1s( ch, *beta ) ) \ { \ - PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ - else if ( PASTEMAC(ch,eq0)( *beta ) ) \ + else if ( bli_teq0s( ch, *beta ) ) \ { \ - PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ else \ { \ - PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ + bli_taxpbys( ch,ch,ch,ch,ch, *alpha, ab, *beta, *cij ); \ } \ } \ } \ @@ -377,7 +377,7 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict ai = &a [ i*rs_a ]; \ ctype ab; \ \ - PASTEMAC(ch,set0s)( ab ); \ + bli_tset0s( ch, ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ @@ -385,23 +385,23 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict aij = &ai[ l*cs_a ]; \ const ctype* restrict bij = &bj[ l*rs_b ]; \ \ - PASTEMAC(ch,dotjs)( *aij, *bij, ab ); \ + bli_tdotjs( ch,ch,ch,ch, *aij, *bij, ab ); \ } \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ - if ( PASTEMAC(ch,eq1)( *beta ) ) \ + if ( bli_teq1s( ch, *beta ) ) \ { \ - PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ - else if ( PASTEMAC(ch,eq0)( *beta ) ) \ + else if ( bli_teq0s( ch, *beta ) ) \ { \ - PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ else \ { \ - PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ + bli_taxpbys( ch,ch,ch,ch,ch, *alpha, ab, *beta, *cij ); \ } \ } \ } \ @@ -420,7 +420,7 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict ai = &a [ i*rs_a ]; \ ctype ab; \ \ - PASTEMAC(ch,set0s)( ab ); \ + bli_tset0s( ch, ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ @@ -428,26 +428,26 @@ void PASTEMAC(ch,opname,arch,suf) \ const ctype* restrict aij = &ai[ l*cs_a ]; \ const ctype* restrict bij = &bj[ l*rs_b ]; \ \ - PASTEMAC(ch,dots)( *aij, *bij, ab ); \ + bli_tdots( ch,ch,ch,ch, *aij, *bij, ab ); \ } \ \ /* Conjugate the result to simulate conj(a^T) * conj(b). */ \ - PASTEMAC(ch,conjs)( ab ); \ + bli_tconjs( ch, ab ); \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ - if ( PASTEMAC(ch,eq1)( *beta ) ) \ + if ( bli_teq1s( ch, *beta ) ) \ { \ - PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ + bli_taxpys( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ - else if ( PASTEMAC(ch,eq0)( *beta ) ) \ + else if ( bli_teq0s( ch, *beta ) ) \ { \ - PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ + bli_tscal2s( ch,ch,ch,ch, *alpha, ab, *cij ); \ } \ else \ { \ - PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ + bli_taxpbys( ch,ch,ch,ch,ch, *alpha, ab, *beta, *cij ); \ } \ } \ } \ diff --git a/ref_kernels/3/bli_gemmtrsm_ref.c b/ref_kernels/3/bli_gemmtrsm_ref.c index e1b00e3587..0d496feb76 100644 --- a/ref_kernels/3/bli_gemmtrsm_ref.c +++ b/ref_kernels/3/bli_gemmtrsm_ref.c @@ -138,11 +138,12 @@ PASTEMAC(d,fprintm)( stdout, "gemmtrsm_ukr: b11 after gemm", mr, 2*nr, \ \ /* Broadcast the elements of the updated b11 submatrix to their duplicated neighbors. */ \ - PASTEMAC(ch,bcastbbs_mxn) \ + bli_tbcastbbs_mxn \ ( \ - m, \ + ch, \ n, \ - b11, rs_b, cs_b \ + m, \ + b11, cs_b, rs_b \ ); \ \ /* b11 = inv(a11) * b11; @@ -162,8 +163,9 @@ PASTEMAC(d,fprintm)( stdout, "gemmtrsm_ukr: b11 after trsm", mr, 2*nr, \ \ if ( use_ct ) \ { \ - PASTEMAC(ch,copys_mxn) \ + bli_tcopys_mxn \ ( \ + ch,ch, \ m, n, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c \ diff --git a/ref_kernels/3/bli_trsm_ref.c b/ref_kernels/3/bli_trsm_ref.c index c5f8333591..cbd64899d9 100644 --- a/ref_kernels/3/bli_trsm_ref.c +++ b/ref_kernels/3/bli_trsm_ref.c @@ -91,29 +91,29 @@ void PASTEMAC(ch,opname,arch,suf) \ ctype rho11; \ \ /* beta11 = beta11 - a10t * b01; */ \ - PASTEMAC(ch,set0s)( rho11 ); \ + bli_tset0s( ch, rho11 ); \ for ( dim_t l = 0; l < n_behind; ++l ) \ { \ const ctype* restrict alpha10 = a10t + (l )*cs_a; \ ctype* restrict beta01 = b01 + (l )*rs_b; \ \ - PASTEMAC(ch,axpys)( *alpha10, *beta01, rho11 ); \ + bli_taxpys( ch,ch,ch,ch, *alpha10, *beta01, rho11 ); \ } \ - PASTEMAC(ch,subs)( rho11, beta11c ); \ + bli_tsubs( ch,ch,ch, rho11, beta11c ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: When preinversion is enabled, the INVERSE of alpha11 (1.0/alpha11) is stored during packing instead alpha11 so we can multiply rather than divide. When preinversion is disabled, alpha11 is stored and division happens below explicitly. */ \ - PASTEMAC(ch,scals)( *alpha11, beta11c ); \ + PASTEMAC(t,diagop)( ch,ch,ch, *alpha11, beta11c ); \ \ /* Output final result to matrix c. */ \ - PASTEMAC(ch,copys)( beta11c, *gamma11 ); \ + bli_tcopys( ch,ch, beta11c, *gamma11 ); \ \ /* Store the local value back to b11. */ \ for ( dim_t d = 0; d < cs_b; ++d ) \ - PASTEMAC(ch,copys)( beta11c, *(beta11 + d) ); \ + bli_tcopys( ch,ch, beta11c, *(beta11 + d) ); \ } \ } \ } @@ -179,29 +179,29 @@ void PASTEMAC(ch,opname,arch,suf) \ ctype rho11; \ \ /* beta11 = beta11 - a12t * b21; */ \ - PASTEMAC(ch,set0s)( rho11 ); \ + bli_tset0s( ch, rho11 ); \ for ( dim_t l = 0; l < n_behind; ++l ) \ { \ const ctype* restrict alpha12 = a12t + (l )*cs_a; \ ctype* restrict beta21 = b21 + (l )*rs_b; \ \ - PASTEMAC(ch,axpys)( *alpha12, *beta21, rho11 ); \ + bli_taxpys( ch,ch,ch,ch, *alpha12, *beta21, rho11 ); \ } \ - PASTEMAC(ch,subs)( rho11, beta11c ); \ + bli_tsubs( ch,ch,ch, rho11, beta11c ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: When preinversion is enabled, the INVERSE of alpha11 (1.0/alpha11) is stored during packing instead alpha11 so we can multiply rather than divide. When preinversion is disabled, alpha11 is stored and division happens below explicitly. */ \ - PASTEMAC(ch,diagop)( *alpha11, beta11c ); \ + PASTEMAC(t,diagop)( ch,ch,ch, *alpha11, beta11c ); \ \ /* Output final result to matrix c. */ \ - PASTEMAC(ch,copys)( beta11c, *gamma11 ); \ + bli_tcopys( ch,ch, beta11c, *gamma11 ); \ \ /* Store the local value back to b11. */ \ for ( dim_t d = 0; d < cs_b; ++d ) \ - PASTEMAC(ch,copys)( beta11c, *(beta11 + d) ); \ + bli_tcopys( ch,ch, beta11c, *(beta11 + d) ); \ } \ } \ } diff --git a/ref_kernels/ind/bli_gemm1m_ref.c b/ref_kernels/ind/bli_gemm1m_ref.c index 30904ecdb5..63269c8596 100644 --- a/ref_kernels/ind/bli_gemm1m_ref.c +++ b/ref_kernels/ind/bli_gemm1m_ref.c @@ -90,8 +90,8 @@ void PASTEMAC(chabr,chcr,opname,arch,suf) \ auxinfo_t auxinfo_r = *auxinfo; \ bli_auxinfo_set_params( params_r, &auxinfo_r ); \ \ - if ( !PASTEMAC(chabr,eq0)( *alpha_i ) || \ - !PASTEMAC(chcr,eq0)( *beta_i ) || \ + if ( !bli_teq0s( chabr, *alpha_i ) || \ + !bli_teq0s( chcr, *beta_i ) || \ !bli_is_preferentially_stored( rs_c, cs_c, row_pref ) || \ !PASTEMAC(chabr,chcr,same) ) \ { \ @@ -134,8 +134,9 @@ void PASTEMAC(chabr,chcr,opname,arch,suf) \ cntx \ ); \ \ - PASTEMAC(chab,chab,chc,chc,axpbys_mxn) \ + bli_taxpbys_mxn \ ( \ + chab,chab,chc,chc,chc, \ m, n, \ alpha, \ ct, rs_ct, cs_ct, \ diff --git a/ref_kernels/ind/bli_gemm_ccr_ref.c b/ref_kernels/ind/bli_gemm_ccr_ref.c index f86faebc81..484cf678f2 100644 --- a/ref_kernels/ind/bli_gemm_ccr_ref.c +++ b/ref_kernels/ind/bli_gemm_ccr_ref.c @@ -89,8 +89,8 @@ void PASTEMAC(chabr,chcr,opname,arch,suf) \ auxinfo_t auxinfo_r = *auxinfo; \ bli_auxinfo_set_params( params_r, &auxinfo_r ); \ \ - if ( !PASTEMAC(chabr,eq0)( *alpha_i ) || \ - !PASTEMAC(chcr,eq0)( *beta_i ) || \ + if ( !bli_teq0s( chabr, *alpha_i ) || \ + !bli_teq0s( chcr, *beta_i ) || \ !bli_is_preferentially_stored( rs_c, cs_c, row_pref ) || \ !PASTEMAC(chab,chc,same) ) \ { \ @@ -128,8 +128,9 @@ void PASTEMAC(chabr,chcr,opname,arch,suf) \ cntx \ ); \ \ - PASTEMAC(chab,chab,chc,chc,axpbys_mxn) \ + bli_taxpbys_mxn \ ( \ + chab,chab,chc,chc,chc, \ m, n, \ alpha, \ ct, rs_ct, cs_ct, \ diff --git a/ref_kernels/ind/bli_gemm_crr_ref.c b/ref_kernels/ind/bli_gemm_crr_ref.c index c729e95d7f..243e9e988e 100644 --- a/ref_kernels/ind/bli_gemm_crr_ref.c +++ b/ref_kernels/ind/bli_gemm_crr_ref.c @@ -99,35 +99,16 @@ void PASTEMAC(chabr,chcr,opname,arch,suf) \ cntx \ ); \ \ - ctype_abr ar, ai; \ - PASTEMAC(chab,gets)( *alpha, ar, ai ); \ -\ - if ( PASTEMAC(chc,eq0)( *beta ) ) \ - { \ - for ( dim_t jj = 0; jj < n; ++jj ) \ - for ( dim_t ii = 0; ii < m; ++ii ) \ - { \ - ctype_abr axr, axi; \ - ctype_ab ax; \ - PASTEMAC(chabr,scal2s)( ar, *(ct + ii*rs_ct + jj*cs_ct), axr ); \ - PASTEMAC(chabr,scal2s)( ai, *(ct + ii*rs_ct + jj*cs_ct), axi ); \ - PASTEMAC(chab,sets)( axr, axi, ax ); \ - PASTEMAC(chab,chc,copys)( ax, *(c + ii*rs_c + jj*cs_c) ); \ - } \ - } \ - else \ - { \ - for ( dim_t jj = 0; jj < n; ++jj ) \ - for ( dim_t ii = 0; ii < m; ++ii ) \ - { \ - ctype_abr axr, axi; \ - ctype_ab ax; \ - PASTEMAC(chabr,scal2s)( ar, *(ct + ii*rs_ct + jj*cs_ct), axr ); \ - PASTEMAC(chabr,scal2s)( ai, *(ct + ii*rs_ct + jj*cs_ct), axi ); \ - PASTEMAC(chab,sets)( axr, axi, ax ); \ - PASTEMAC(chab,chc,chc,xpbys)( ax, *beta, *(c + ii*rs_c + jj*cs_c) ); \ - } \ - } \ + bli_taxpbys_mxn \ + ( \ + chab,chabr,chc,chc,chc, \ + m, \ + n, \ + alpha, \ + ct, rs_ct, cs_ct, \ + beta, \ + c, rs_c, cs_c \ + ); \ } INSERT_GENTFUNC2RO( gemm_crr, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) diff --git a/ref_kernels/ind/bli_gemm_rcc_ref.c b/ref_kernels/ind/bli_gemm_rcc_ref.c index 67859cd4a1..97424bfde2 100644 --- a/ref_kernels/ind/bli_gemm_rcc_ref.c +++ b/ref_kernels/ind/bli_gemm_rcc_ref.c @@ -99,8 +99,9 @@ void PASTEMAC(chab,chc,opname,arch,suf) \ cntx \ ); \ \ - PASTEMAC(chab,chc,chc,xpbys_mxn) \ + bli_txpbys_mxn \ ( \ + chab,chc,chc,chc, \ m, n, \ ct, rs_ct, cs_ct, \ beta, \ diff --git a/ref_kernels/ind/bli_gemmtrsm1m_ref.c b/ref_kernels/ind/bli_gemmtrsm1m_ref.c index 54f9900c33..10a167570d 100644 --- a/ref_kernels/ind/bli_gemmtrsm1m_ref.c +++ b/ref_kernels/ind/bli_gemmtrsm1m_ref.c @@ -93,7 +93,7 @@ static void PASTEMAC(chr,opname,arch,suf) \ ctype_r rho11_i; \ \ /* beta11 = beta11 - a10t * b01; */ \ - PASTEMAC(ch,set0ris)( rho11_r, \ + bli_tset0ris( ch, rho11_r, \ rho11_i ); \ for ( dim_t l = 0; l < n_behind; ++l ) \ { \ @@ -102,36 +102,36 @@ static void PASTEMAC(chr,opname,arch,suf) \ ctype_r* restrict beta01_r = b01_ri + (l )*rs_b2 + 0*cs_b; \ ctype_r* restrict beta01_i = b01_ri + (l )*rs_b2 + 1*cs_b; \ \ - PASTEMAC(ch,axpyris)( *alpha10_r, \ - *alpha10_i, \ - *beta01_r, \ - *beta01_i, \ - rho11_r, \ - rho11_i ); \ + bli_taxpyris( ch,ch,ch,ch, *alpha10_r, \ + *alpha10_i, \ + *beta01_r, \ + *beta01_i, \ + rho11_r, \ + rho11_i ); \ } \ - PASTEMAC(ch,subris)( rho11_r, \ - rho11_i, \ - beta11c_r, \ - beta11c_i ); \ + bli_tsubris( ch,ch,ch, rho11_r, \ + rho11_i, \ + beta11c_r, \ + beta11c_i ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: When preinversion is enabled, the INVERSE of alpha11 (1.0/alpha11) is stored during packing instead alpha11 so we can multiply rather than divide. When preinversion is disabled, alpha11 is stored and division happens below explicitly. */ \ - PASTEMAC(ch,diagop)( *alpha11_r, \ - *alpha11_i, \ - beta11c_r, \ - beta11c_i ); \ + PASTEMAC(t,diagop)( ch,ch,ch, *alpha11_r, \ + *alpha11_i, \ + beta11c_r, \ + beta11c_i ); \ \ /* Output final result to matrix c. */ \ - PASTEMAC(ch,sets)( beta11c_r, beta11c_i, *gamma11 ); \ + bli_tsets( ch,ch, beta11c_r, beta11c_i, *gamma11 ); \ \ /* Store the local values back to b11. */ \ for ( dim_t d = 0; d < cs_b; ++d ) \ { \ - PASTEMAC(ch,copyris)( beta11c_r, beta11c_i, *(beta11_ri_r + d), *(beta11_ri_i + d) ); \ - PASTEMAC(ch,copyris)( -beta11c_i, beta11c_r, *(beta11_ir_r + d), *(beta11_ir_i + d) ); \ + bli_tcopyris( ch,ch, beta11c_r, beta11c_i, *(beta11_ri_r + d), *(beta11_ri_i + d) ); \ + bli_tcopyris( ch,ch, -beta11c_i, beta11c_r, *(beta11_ir_r + d), *(beta11_ir_i + d) ); \ } \ } \ } \ @@ -176,8 +176,8 @@ static void PASTEMAC(chr,opname,arch,suf) \ ctype_r rho11_i; \ \ /* beta11 = beta11 - a10t * b01; */ \ - PASTEMAC(ch,set0ris)( rho11_r, \ - rho11_i ); \ + bli_tset0ris( ch, rho11_r, \ + rho11_i ); \ for ( dim_t l = 0; l < n_behind; ++l ) \ { \ ctype* restrict alpha10_ri = a10t_ri + (l )*cs_a; \ @@ -186,38 +186,38 @@ static void PASTEMAC(chr,opname,arch,suf) \ ctype_r* restrict beta01_r = b01_r + (l )*rs_b2; \ ctype_r* restrict beta01_i = b01_i + (l )*rs_b2; \ \ - PASTEMAC(ch,axpyris)( *alpha10_r, \ - *alpha10_i, \ - *beta01_r, \ - *beta01_i, \ - rho11_r, \ - rho11_i ); \ + bli_taxpyris( ch,ch,ch,ch, *alpha10_r, \ + *alpha10_i, \ + *beta01_r, \ + *beta01_i, \ + rho11_r, \ + rho11_i ); \ } \ - PASTEMAC(ch,subris)( rho11_r, \ - rho11_i, \ - beta11c_r, \ - beta11c_i ); \ + bli_tsubris( ch,ch,ch, rho11_r, \ + rho11_i, \ + beta11c_r, \ + beta11c_i ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: When preinversion is enabled, the INVERSE of alpha11 (1.0/alpha11) is stored during packing instead alpha11 so we can multiply rather than divide. When preinversion is disabled, alpha11 is stored and division happens below explicitly. */ \ - PASTEMAC(ch,diagop)( *alpha11_r, \ - *alpha11_i, \ - beta11c_r, \ - beta11c_i ); \ + PASTEMAC(t,diagop)( ch,ch,ch, *alpha11_r, \ + *alpha11_i, \ + beta11c_r, \ + beta11c_i ); \ \ /* Output final result to matrix c. */ \ - PASTEMAC(ch,sets)( beta11c_r, \ - beta11c_i, *gamma11 ); \ + bli_tsets( ch,ch, beta11c_r, \ + beta11c_i, *gamma11 ); \ \ /* Store the local values back to b11. */ \ for ( dim_t d = 0; d < cs_b; ++d ) \ - PASTEMAC(ch,copyris)( beta11c_r, \ - beta11c_i, \ - *(beta11_r + d), \ - *(beta11_i + d) ); \ + bli_tcopyris( ch,ch, beta11c_r, \ + beta11c_i, \ + *(beta11_r + d), \ + *(beta11_i + d) ); \ } \ } \ } \ @@ -288,7 +288,7 @@ static void PASTEMAC(chr,opname,arch,suf) \ ctype_r rho11_i; \ \ /* beta11 = beta11 - a10t * b01; */ \ - PASTEMAC(ch,set0ris)( rho11_r, \ + bli_tset0ris( ch, rho11_r, \ rho11_i ); \ for ( dim_t l = 0; l < n_behind; ++l ) \ { \ @@ -297,36 +297,36 @@ static void PASTEMAC(chr,opname,arch,suf) \ ctype_r* restrict beta21_r = b21_ri + (l )*rs_b2 + 0*cs_b; \ ctype_r* restrict beta21_i = b21_ri + (l )*rs_b2 + 1*cs_b; \ \ - PASTEMAC(ch,axpyris)( *alpha12_r, \ - *alpha12_i, \ - *beta21_r, \ - *beta21_i, \ - rho11_r, \ - rho11_i ); \ + bli_taxpyris( ch,ch,ch,ch, *alpha12_r, \ + *alpha12_i, \ + *beta21_r, \ + *beta21_i, \ + rho11_r, \ + rho11_i ); \ } \ - PASTEMAC(ch,subris)( rho11_r, \ - rho11_i, \ - beta11c_r, \ - beta11c_i ); \ + bli_tsubris( ch,ch,ch, rho11_r, \ + rho11_i, \ + beta11c_r, \ + beta11c_i ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: When preinversion is enabled, the INVERSE of alpha11 (1.0/alpha11) is stored during packing instead alpha11 so we can multiply rather than divide. When preinversion is disabled, alpha11 is stored and division happens below explicitly. */ \ - PASTEMAC(ch,diagop)( *alpha11_r, \ - *alpha11_i, \ - beta11c_r, \ - beta11c_i ); \ + PASTEMAC(t,diagop)( ch,ch,ch, *alpha11_r, \ + *alpha11_i, \ + beta11c_r, \ + beta11c_i ); \ \ /* Output final result to matrix c. */ \ - PASTEMAC(ch,sets)( beta11c_r, beta11c_i, *gamma11 ); \ + bli_tsets( ch,ch, beta11c_r, beta11c_i, *gamma11 ); \ \ /* Store the local values back to b11. */ \ for ( dim_t d = 0; d < cs_b; ++d ) \ { \ - PASTEMAC(ch,copyris)( beta11c_r, beta11c_i, *(beta11_ri_r + d), *(beta11_ri_i + d) ); \ - PASTEMAC(ch,copyris)( -beta11c_i, beta11c_r, *(beta11_ir_r + d), *(beta11_ir_i + d) ); \ + bli_tcopyris( ch,ch, beta11c_r, beta11c_i, *(beta11_ri_r + d), *(beta11_ri_i + d) ); \ + bli_tcopyris( ch,ch, -beta11c_i, beta11c_r, *(beta11_ir_r + d), *(beta11_ir_i + d) ); \ } \ } \ } \ @@ -371,7 +371,7 @@ static void PASTEMAC(chr,opname,arch,suf) \ ctype_r rho11_i; \ \ /* beta11 = beta11 - a10t * b01; */ \ - PASTEMAC(ch,set0ris)( rho11_r, \ + bli_tset0ris( ch, rho11_r, \ rho11_i ); \ for ( dim_t l = 0; l < n_behind; ++l ) \ { \ @@ -381,38 +381,38 @@ static void PASTEMAC(chr,opname,arch,suf) \ ctype_r* restrict beta21_r = b21_r + (l )*rs_b2; \ ctype_r* restrict beta21_i = b21_i + (l )*rs_b2; \ \ - PASTEMAC(ch,axpyris)( *alpha12_r, \ - *alpha12_i, \ - *beta21_r, \ - *beta21_i, \ - rho11_r, \ - rho11_i ); \ + bli_taxpyris( ch,ch,ch,ch, *alpha12_r, \ + *alpha12_i, \ + *beta21_r, \ + *beta21_i, \ + rho11_r, \ + rho11_i ); \ } \ - PASTEMAC(ch,subris)( rho11_r, \ - rho11_i, \ - beta11c_r, \ - beta11c_i ); \ + bli_tsubris( ch,ch,ch, rho11_r, \ + rho11_i, \ + beta11c_r, \ + beta11c_i ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: When preinversion is enabled, the INVERSE of alpha11 (1.0/alpha11) is stored during packing instead alpha11 so we can multiply rather than divide. When preinversion is disabled, alpha11 is stored and division happens below explicitly. */ \ - PASTEMAC(ch,diagop)( *alpha11_r, \ - *alpha11_i, \ - beta11c_r, \ - beta11c_i ); \ + PASTEMAC(t,diagop)( ch,ch,ch, *alpha11_r, \ + *alpha11_i, \ + beta11c_r, \ + beta11c_i ); \ \ /* Output final result to matrix c. */ \ - PASTEMAC(ch,sets)( beta11c_r, \ - beta11c_i, *gamma11 ); \ + bli_tsets( ch,ch, beta11c_r, \ + beta11c_i, *gamma11 ); \ \ /* Store the local values back to b11. */ \ for ( dim_t d = 0; d < cs_b; ++d ) \ - PASTEMAC(ch,copyris)( beta11c_r, \ - beta11c_i, \ - *(beta11_r + d), \ - *(beta11_i + d) ); \ + bli_tcopyris( ch,ch, beta11c_r, \ + beta11c_i, \ + *(beta11_r + d), \ + *(beta11_i + d) ); \ } \ } \ } \ @@ -540,8 +540,9 @@ void PASTEMAC(chr,opname,arch,suf) \ ctype_r* restrict beta11_ir_r = b11_ir + i*rs_b2 + j*cs_b2 + 0*cs_b + d; \ ctype_r* restrict beta11_ir_i = b11_ir + i*rs_b2 + j*cs_b2 + 1*cs_b + d; \ \ - PASTEMAC(ch,chr,ch,xpbyris) \ + bli_txpbyris \ ( \ + ch,chr,ch,chr, \ *beta11t_r, \ *beta11t_i, \ alpha_r, \ @@ -550,8 +551,8 @@ void PASTEMAC(chr,opname,arch,suf) \ *beta11_ri_i \ ); \ \ - PASTEMAC(ch,copyris)( -*beta11_ri_i, *beta11_ri_r, \ - *beta11_ir_r, *beta11_ir_i ); \ + bli_tcopyris( ch,ch, -*beta11_ri_i, *beta11_ri_r, \ + *beta11_ir_r, *beta11_ir_i ); \ } \ } \ else /* if ( bli_is_1r_packed( schema_b ) ) */ \ @@ -575,8 +576,9 @@ void PASTEMAC(chr,opname,arch,suf) \ ctype_r* restrict beta11_r = b11_r + i*rs_b2 + j*cs_b2 + d; \ ctype_r* restrict beta11_i = b11_i + i*rs_b2 + j*cs_b2 + d; \ \ - PASTEMAC(ch,chr,ch,xpbyris) \ + bli_txpbyris \ ( \ + ch,chr,ch,chr, \ *beta11t_r, \ *beta11t_i, \ alpha_r, \ diff --git a/sandbox/gemmlike/bls_l3_packm_var1.c b/sandbox/gemmlike/bls_l3_packm_var1.c index ab656a31a0..b35a7bdc36 100644 --- a/sandbox/gemmlike/bls_l3_packm_var1.c +++ b/sandbox/gemmlike/bls_l3_packm_var1.c @@ -139,7 +139,7 @@ void bls_packm_var1 // ctype* cli = c_cast + (l )*ldc + (i )*incc; // ctype* pli = p_cast + (l )*ldp + (i )*1; // - // PASTEMAC(ch,axpyjs)( kappa_cast, *cli, *pli ); + // bli_taxpyjs( ch,ch,ch,ch, kappa_cast, *cli, *pli ); // } // } // } @@ -152,7 +152,7 @@ void bls_packm_var1 // ctype* cli = c_cast + (l )*ldc + (i )*incc; // ctype* pli = p_cast + (l )*ldp + (i )*1; // - // PASTEMAC(ch,axpys)( kappa_cast, *cli, *pli ); + // bli_taxpys( ch,ch,ch,ch, kappa_cast, *cli, *pli ); // } // } // } diff --git a/test/level0/Makefile b/test/level0/Makefile new file mode 100644 index 0000000000..c07dce37c4 --- /dev/null +++ b/test/level0/Makefile @@ -0,0 +1,160 @@ +#!/bin/bash +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + +# +# Makefile +# +# Field G. Van Zee +# +# Makefile for standalone BLIS test drivers. +# + +# +# --- Makefile PHONY target definitions ---------------------------------------- +# + +.PHONY: all \ + test-ranges \ + clean cleanx + + + +# +# --- Determine makefile fragment location ------------------------------------- +# + +# Comments: +# - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. +# - We must use recursively expanded assignment for LIB_PATH and INC_PATH in +# the second case because CONFIG_NAME is not yet set. +ifneq ($(strip $(BLIS_INSTALL_PATH)),) +LIB_PATH := $(BLIS_INSTALL_PATH)/lib +INC_PATH := $(BLIS_INSTALL_PATH)/include/blis +SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis +else +DIST_PATH := ../.. +LIB_PATH = ../../lib/$(CONFIG_NAME) +INC_PATH = ../../include/$(CONFIG_NAME) +SHARE_PATH := ../.. +endif + + + +# +# --- Include common makefile definitions -------------------------------------- +# + +# Include the common makefile fragment. +-include $(SHARE_PATH)/common.mk + + + +# +# --- General build definitions ------------------------------------------------ +# + +TEST_SRC_PATH := . +TEST_OBJ_PATH := . + +# Gather all local object files. +TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \ + $(TEST_OBJ_PATH)/%.o, \ + $(wildcard $(TEST_SRC_PATH)/*.c))) + +# Override the value of CINCFLAGS so that the value of CFLAGS returned by +# get-user-cflags-for() is not cluttered up with include paths needed only +# while building BLIS. +CINCFLAGS := -I$(INC_PATH) + +# Use the CFLAGS for the configuration family. +#CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) +CFLAGS := $(strip -mavx2 -mfma -mfpmath=sse -march=haswell \ + -funsafe-math-optimizations \ + $(call get-user-cflags-for,$(CONFIG_NAME))) +# -funsafe-math-optimizations -ffp-contract=fast + +# Add installed and local header paths to CFLAGS +CFLAGS += -I$(TEST_SRC_PATH) + +HEADERS := $(wildcard $(TEST_SRC_PATH)/*.h) + +# Locate the libblis library to which we will link. +#LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) + + + +# +# --- Targets/rules ------------------------------------------------------------ +# + +all: test asm + +asm: test_l0.s + +test: test_l0.x + + + +# -- Object file rules -- + +$(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c + $(CC) $(CFLAGS) -c $< -o $@ + +# -- Source file rules -- + +test_%.o: test_%.c Makefile $(HEADERS) + $(CC) $(CFLAGS) -c $< -o $@ + +test_%.s: test_%.c Makefile $(HEADERS) + $(CC) -S $(CFLAGS) -c $< -o $@ + + +# -- Executable file rules -- + +# NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS +# on the link command line in case BLIS was configured with the BLAS +# compatibility layer. This prevents BLIS from inadvertently getting called +# for the BLAS routines we are trying to test with. + +test_l0.x: test_l0.o $(LIBBLIS_LINK) + $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ + + +# -- Clean rules -- + +clean: cleanx + +cleanx: + - $(RM_F) *.o *.x test_l0.s + diff --git a/test/level0/bli_unit_testing.h b/test/level0/bli_unit_testing.h new file mode 100644 index 0000000000..58cd34dc3e --- /dev/null +++ b/test/level0/bli_unit_testing.h @@ -0,0 +1,876 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2021, Southern Methodist University + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_UNIT_TESTING_H +#define BLIS_UNIT_TESTING_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "blis.h" + +using unit_test_t = void(*)(); + +struct variable_printer_base +{ + virtual ~variable_printer_base() {} + + virtual void print() const = 0; +}; + +struct unit_test_failure : std::exception {}; + +struct unit_test_registrar +{ + std::vector tests; + std::vector vars; + + static const std::string& red() + { + #ifdef BLIS_OS_WINDOWS + static std::string s = _isatty(_fileno(stdout)) ? "\e[0;31m" : ""; + #else + static std::string s = isatty(fileno(stdout)) ? "\e[0;31m" : ""; + #endif + return s; + } + + static const std::string& green() + { + #ifdef BLIS_OS_WINDOWS + static std::string s = _isatty(_fileno(stdout)) ? "\e[0;32m" : ""; + #else + static std::string s = isatty(fileno(stdout)) ? "\e[0;32m" : ""; + #endif + return s; + } + + static const std::string& normal() + { + #ifdef BLIS_OS_WINDOWS + static std::string s = _isatty(_fileno(stdout)) ? "\e[0m" : ""; + #else + static std::string s = isatty(fileno(stdout)) ? "\e[0m" : ""; + #endif + return s; + } + + size_t register_test(unit_test_t test) + { + tests.push_back(test); + return tests.size()-1; + } + + void run_tests() + { + auto failed = 0; + auto total = 0; + + for (auto& test : tests) + { + try + { + test(); + } + catch (unit_test_failure) + { + failed++; + } + + total++; + } + + printf("\n"); + printf("Total tests: %d\n", total); + printf("%sPassed: %d (%.1f%%)%s\n", green().c_str(), total-failed, 100.0*(total-failed)/total, normal().c_str()); + if (failed) + printf("%sFailed: %d (%.1f%%)%s\n\n", red().c_str(), failed, 100.0*failed/total, normal().c_str()); + } + + void push_var(const variable_printer_base* var) + { + vars.push_back(var); + } + + void pop_var(const variable_printer_base* var) + { + assert(vars.back() == var); + vars.pop_back(); + } + + void fail(const char* cond) + { + printf("%sFAILURE%s\n\n", red().c_str(), normal().c_str()); + + for (auto& var : vars) + var->print(); + + printf("\nAssertion failed: %s\n\n", cond); + + signal(SIGTRAP, [](int) {}); + raise(SIGTRAP); + + throw unit_test_failure(); + } +}; + +static unit_test_registrar& get_unit_test_registrar() +{ + static unit_test_registrar registrar; + return registrar; +} + +static size_t register_unit_test(unit_test_t test) +{ + return get_unit_test_registrar().register_test(test); +} + +template +struct variable_printer : variable_printer_base +{ + const char* message{}; + T var{}; + + variable_printer() + { + get_unit_test_registrar().push_var(this); + } + + virtual ~variable_printer() + { + get_unit_test_registrar().pop_var(this); + } + + variable_printer& operator<<(const char* message) + { + this->message = message; + return *this; + } + + variable_printer& operator<<(const T& var) + { + this->var = var; + return *this; + } + + virtual void print() const final override + { + std::cout << message << var << std::endl; + } +}; + +template <> +struct variable_printer : variable_printer_base +{ + const char* message; + + variable_printer() + { + get_unit_test_registrar().push_var(this); + } + + virtual ~variable_printer() + { + get_unit_test_registrar().pop_var(this); + } + + variable_printer& operator<<(const char* message) + { + this->message = message; + return *this; + } + + virtual void print() const final override + { + std::cout << message << std::endl; + } +}; + +template +struct variable_printer_helper +{ + using type = variable_printer; + + template + variable_printer_helper operator<<(U) const; + + variable_printer_helper operator<<(const char*) const; +}; + +#define VARIABLE_PRINTER(...) typename decltype(variable_printer_helper{} << __VA_ARGS__)::type + +#define VAR_NAME_(line) variable_printer_##line +#define VAR_NAME(line) VAR_NAME_(line) + +#define INFO_(id, ...) \ +VARIABLE_PRINTER(__VA_ARGS__) VAR_NAME(id); \ +VAR_NAME(id) << __VA_ARGS__; + +#if ENABLE_INFO +#define INFO(...) INFO_(__COUNTER__, __VA_ARGS__) +#else +#define INFO(...) +#endif + +#define TEST_NAME_(line) unit_test_##line +#define TEST_NAME(line) TEST_NAME_(line) + +#define TEST_ID_(line) unit_test_id_##line +#define TEST_ID(line) TEST_ID_(line) + +#define TEST_CASE_(id,name) \ +extern "C" void TEST_NAME(id)(); \ +static auto TEST_ID(id) = register_unit_test(TEST_NAME(id)); \ +void TEST_NAME(id)() +#define TEST_CASE(name) TEST_CASE_(__COUNTER__,name) + +#define REQUIRE(cond) \ +do { \ + if ( !BLIS_LIKELY( cond ) ) \ + { \ + get_unit_test_registrar().fail( #cond ); \ + } \ +} while (0) + +#define FAIL(...) \ +do { \ + INFO(__VA_ARGS__); \ + REQUIRE(false); \ +} while (0) + +class Approx +{ + protected: + double target_; + double margin_ = 0; + + public: + Approx(double target) : target_(target) {} + + Approx& margin(double value) + { + margin_ = value; + return *this; + } + + bool operator==(double other) const + { + return std::abs(other - target_) <= margin_; + } + + friend bool operator==(double lhs, const Approx& rhs) + { + return rhs == lhs; + } +}; + +#define UNIT_TEST1( ch1, opname ) \ +TEST_CASE(STRINGIFY_INT(ch1##opname)) \ +{ \ + INFO("Type character 1: " << #ch1); \ + printf("Testing: %s...", STRINGIFY_INT(ch1##opname)); + +#define UNIT_TEST2( ch1, ch2, opname ) \ +TEST_CASE(STRINGIFY_INT(ch1##ch2##opname)) \ +{ \ + INFO("Type character 1: " << #ch1); \ + INFO("Type character 2: " << #ch2); \ + printf("Testing: %s...", STRINGIFY_INT(ch1##ch2##opname)); + +#define UNIT_TEST3( ch1, ch2, ch3, opname ) \ +TEST_CASE(STRINGIFY_INT(ch1##ch2##ch3##opname)) \ +{ \ + INFO("Type character 1: " << #ch1); \ + INFO("Type character 2: " << #ch2); \ + INFO("Type character 3: " << #ch3); \ + printf("Testing: %s...", STRINGIFY_INT(ch1##ch2##ch3##opname)); + +#define UNIT_TEST4( ch1, ch2, ch3, ch4, opname ) \ +TEST_CASE(STRINGIFY_INT(ch1##ch2##ch3##ch4##opname)) \ +{ \ + INFO("Type character 1: " << #ch1); \ + INFO("Type character 2: " << #ch2); \ + INFO("Type character 3: " << #ch3); \ + INFO("Type character 4: " << #ch4); \ + printf("Testing: %s...", STRINGIFY_INT(ch1##ch2##ch3##ch4##opname)); + +#define UNIT_TEST5( ch1, ch2, ch3, ch4, ch5, opname ) \ +TEST_CASE(STRINGIFY_INT(ch1##ch2##ch3##ch4##ch5##opname)) \ +{ \ + INFO("Type character 1: " << #ch1); \ + INFO("Type character 2: " << #ch2); \ + INFO("Type character 3: " << #ch3); \ + INFO("Type character 4: " << #ch4); \ + INFO("Type character 5: " << #ch5); \ + printf("Testing: %s...", STRINGIFY_INT(ch1##ch2##ch3##ch4##ch5##opname)); + +#define UNIT_TEST_BODY( ... ) \ + __VA_ARGS__; \ + printf("%sPASS%s\n", unit_test_registrar::green().c_str(), unit_test_registrar::normal().c_str()); \ +} + +#define UNIT_TEST_SELECTOR_( ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7, ... ) ARG7 + +#define UNIT_TEST_SELECTOR( ... ) \ +UNIT_TEST_SELECTOR_( __VA_ARGS__, \ + UNIT_TEST5, \ + UNIT_TEST4, \ + UNIT_TEST3, \ + UNIT_TEST2, \ + UNIT_TEST1) + +#define UNIT_TEST( ... ) UNIT_TEST_SELECTOR(__VA_ARGS__)(__VA_ARGS__) UNIT_TEST_BODY + +enum +{ + BLIS_TEST_ZERO = 0x01, + BLIS_TEST_NEGATIVE = 0x02, + BLIS_TEST_INFINITY = 0x04, + BLIS_TEST_NAN = 0x08, + BLIS_TEST_DEFAULT = ~BLIS_TEST_INFINITY +}; + +template +struct is_complex : std::false_type {}; + +template <> +struct is_complex : std::true_type {}; + +template <> +struct is_complex : std::true_type {}; + +template +struct is_real : std::integral_constant::value> {}; + +template struct make_complex; + +template <> struct make_complex { using type = scomplex; }; +template <> struct make_complex { using type = dcomplex; }; +template <> struct make_complex { using type = scomplex; }; +template <> struct make_complex { using type = dcomplex; }; + +template +using make_complex_t = typename make_complex::type; + +template struct make_real; + +template <> struct make_real { using type = float; }; +template <> struct make_real { using type = double; }; +template <> struct make_real { using type = float; }; +template <> struct make_real { using type = double; }; + +template +using make_real_t = typename make_real::type; + +template +struct make_complex_if : std::conditional,make_real_t> {}; + +template +using make_complex_if_t = typename make_complex_if::type; + +template +struct real_imag_part +{ + real_imag_part& operator=(T) { return *this; } + + operator T() const { return T(); } +}; + +template +std::enable_if_t::type>::value,T&> real(T& x) { return x; } + +template +std::enable_if_t::value,real_imag_part> imag(T) { return {}; } + +inline float& real(scomplex& x) { return x.real; } + +inline float& imag(scomplex& x) { return x.imag; } + +inline double& real(dcomplex& x) { return x.real; } + +inline double& imag(dcomplex& x) { return x.imag; } + +inline const float& real(const scomplex& x) { return x.real; } + +inline const float& imag(const scomplex& x) { return x.imag; } + +inline const double& real(const dcomplex& x) { return x.real; } + +inline const double& imag(const dcomplex& x) { return x.imag; } + +template +std::enable_if_t::value,T> norm(T x) { return x*x; } + +inline float norm(const scomplex& x) { return x.real*x.real + x.imag*x.imag; } + +inline double norm(const dcomplex& x) { return x.real*x.real + x.imag*x.imag; } + +template +std::enable_if_t::value,T> absolute(T x) { return std::abs(x); } + +inline float absolute(const scomplex& x) { return std::hypot(x.real, x.imag); } + +inline double absolute(const dcomplex& x) { return std::hypot(x.real, x.imag); } + +template +std::enable_if_t::value,T> square_root(T x) { return std::sqrt(x); } + +template +struct convert_impl; + +template +struct convert_impl::value && is_real::value>> +{ + void operator()(T x, U& y) const { y = x; } +}; + +template +struct convert_impl::value && is_complex::value>> +{ + void operator()(T x, U& y) const { y.real = x; y.imag = 0; } +}; + +template +struct convert_impl::value && is_real::value>> +{ + void operator()(T x, U& y) const { y = x.real; } +}; + +template +struct convert_impl::value && is_complex::value>> +{ + void operator()(T x, U& y) const { y.real = x.real; y.imag = x.imag; } +}; + +template +U convert(T x) +{ + U y; + convert_impl{}(x,y); + return y; +} + +template +auto convert_prec(T x) -> make_complex_if_t::value> +{ + return convert::value>>(x); +} + +#define COMPLEX_MATH_OPS(rtype, ctype) \ +\ +inline bool operator==(rtype x, ctype y) \ +{ \ + return x == y.real && y.imag == 0; \ +} \ +\ +inline bool operator==(ctype x, rtype y) \ +{ \ + return y == x.real && x.imag == 0; \ +} \ +\ +inline bool operator==(ctype x, ctype y) \ +{ \ + return x.real == y.real && \ + x.imag == y.imag; \ + } \ + \ +inline ctype operator-(ctype x) \ +{ \ + return {-x.real, -x.imag}; \ +} \ +\ +inline ctype operator+(rtype x, ctype y) \ +{ \ + return {x+y.real, y.imag}; \ +} \ +\ +inline ctype operator+(ctype x, rtype y) \ +{ \ + return {y+x.real, x.imag}; \ +} \ +\ +inline ctype operator+(ctype x, ctype y) \ +{ \ + return {x.real+y.real, x.imag+y.imag}; \ +} \ +\ +inline ctype operator-(rtype x, ctype y) \ +{ \ + return {x-y.real, -y.imag}; \ +} \ +\ +inline ctype operator-(ctype x, rtype y) \ +{ \ + return {x.real-y, x.imag}; \ +} \ +\ +inline ctype operator-(ctype x, ctype y) \ +{ \ + return {x.real-y.real, x.imag-y.imag}; \ +} \ +\ +inline ctype operator*(rtype x, ctype y) \ +{ \ + return {x*y.real, x*y.imag}; \ +} \ +\ +inline ctype operator*(ctype x, rtype y) \ +{ \ + return {y*x.real, y*x.imag}; \ +} \ +\ +inline ctype operator*(ctype x, ctype y) \ +{ \ + return {x.real*y.real - x.imag*y.imag, \ + x.real*y.imag + x.imag*y.real}; \ +} \ +\ +inline ctype operator/(rtype x, ctype y) \ +{ \ + auto scale = std::max(std::abs(y.real), std::abs(y.imag)); \ + auto n = std::ilogb(scale); \ + auto yrs = std::scalbn(y.real, -n); \ + auto yis = std::scalbn(y.imag, -n); \ + auto denom = y.real*yrs + y.imag*yis; \ + return {x*yrs/denom, -x*yis/denom}; \ +} \ +\ +inline ctype operator/(ctype x, rtype y) \ +{ \ + return {x.real/y, x.imag/y}; \ +} \ +\ +inline ctype operator/(ctype x, ctype y) \ +{ \ + auto scale = std::max(std::abs(y.real), std::abs(y.imag)); \ + auto n = std::ilogb(scale); \ + auto yrs = std::scalbn(y.real, -n); \ + auto yis = std::scalbn(y.imag, -n); \ + auto denom = y.real*yrs + y.imag*yis; \ + return {(x.real*yrs + x.imag*yis)/denom, \ + (x.imag*yrs - x.real*yis)/denom}; \ +} + +COMPLEX_MATH_OPS(float, scomplex); +COMPLEX_MATH_OPS(double, dcomplex); + +template +std::enable_if_t::value,T> conj(T x) { return x; } + +template +std::enable_if_t::value,T> conj(const T& x) { return {x.real, -x.imag}; } + +template +std::enable_if_t::value,T> swapri(const T& x) { return {x.imag, x.real}; } + +inline bool bli_isnan( float x ) { return bli_sisnan( x ); } + +inline bool bli_isnan( double x ) { return bli_disnan( x ); } + +inline bool bli_isinf( float x ) { return bli_sisinf( x ); } + +inline bool bli_isinf( double x ) { return bli_disinf( x ); } + +template +std::enable_if_t::value> check(T x, T y) +{ + auto tol = 2*std::numeric_limits>::epsilon(); + INFO("x: " << x); + INFO("y: " << y); + INFO("|x-y|: " << std::abs(x-y)); + INFO("eps: " << tol); + if ( bli_isnan( x ) || bli_isnan( y ) ) + REQUIRE( bli_isnan( x ) == bli_isnan( y ) ); + else if ( bli_isinf( x ) || bli_isinf( y ) ) + REQUIRE( x == y ); + else + REQUIRE( x == Approx(y).margin(tol) ); +} + +template +std::enable_if_t::value> check(const T& x, const T& y) +{ + INFO("Real part:"); + check( x.real, y.real ); + INFO("Imag part:"); + check( x.imag, y.imag ); +} + +template +std::enable_if_t::value,std::vector> test_values(int mask = BLIS_TEST_DEFAULT) +{ + std::vector vals{1.439}; + + if (mask & BLIS_TEST_NEGATIVE) + vals.push_back(-2.563); + + if (mask & BLIS_TEST_ZERO) + vals.push_back(0); + + if (mask & BLIS_TEST_INFINITY) + { + vals.push_back(INFINITY); + if (mask & BLIS_TEST_NEGATIVE) + vals.push_back(-INFINITY); + } + + if (mask & BLIS_TEST_NAN) + vals.push_back(NAN); + + return vals; +} + +template +std::enable_if_t::value,std::vector> test_values(int mask = BLIS_TEST_DEFAULT) +{ + auto real_vals = test_values>(mask); + std::vector vals; + for (auto& r : real_vals) + for (auto& i : real_vals) + vals.push_back({r, i}); + return vals; +} + +template +std::enable_if_t::value,std::ostream&> operator<<(std::ostream& os, const T& val) +{ + return os << '(' << val.real << ", " << val.imag << ')'; +} + +template +std::array,M> tile(const T& val = T()) +{ + std::array,M> ret; + for (size_t i = 0;i < M;i++) + for (size_t j = 0;j < N;j++) + ret[i][j] = val; + return ret; +} + +template +std::array,M> conj(const std::array,M>& x) +{ + std::array,M> ret; + for (size_t i = 0;i < M;i++) + for (size_t j = 0;j < N;j++) + ret[i][j] = conj(x[i][j]); + return ret; +} + +template +std::array,N>,M> real(const std::array,M>& x) +{ + std::array,N>,M> ret; + for (size_t i = 0;i < M;i++) + for (size_t j = 0;j < N;j++) + ret[i][j] = real(x[i][j]); + return ret; +} + +template +std::array,N>,M> imag(const std::array,M>& x) +{ + std::array,N>,M> ret; + for (size_t i = 0;i < M;i++) + for (size_t j = 0;j < N;j++) + ret[i][j] = imag(x[i][j]); + return ret; +} + +struct dense_cond +{ + bool operator()(dim_t, dim_t) const { return true; } +}; + +constexpr dense_cond dense; + +struct is_below +{ + doff_t diagoff; + + is_below(doff_t diagoff) : diagoff(diagoff) {} + + bool operator()(dim_t i, dim_t j) const { return j-i <= diagoff; } +}; + +struct is_above +{ + doff_t diagoff; + + is_above(doff_t diagoff) : diagoff(diagoff) {} + + bool operator()(dim_t i, dim_t j) const { return j-i >= diagoff; } +}; + +template +void check(const std::array,M>& x, + const std::array,M>& y) +{ + for (size_t i = 0;i < M;i++) + for (size_t j = 0;j < N;j++) + { + INFO("i = " << i); + INFO("j = " << j); + check(x[i][j], y[i][j]); + } +} + +template +void axpbys_mxn(const A& a, const std::array,M>& x, + const B& b, std::array,M>& y, const std::function& cond) +{ + for (size_t i = 0;i < M;i++) + for (size_t j = 0;j < N;j++) + if (Transpose == BLIS_NO_TRANSPOSE ? cond(i, j) : cond(j, i)) + { + if (real(b) == 0 && imag(b) == 0) + y[i][j] = convert(convert_prec(a) * + convert_prec(x[i][j])); + else + y[i][j] = convert(convert_prec(a) * + convert_prec(x[i][j]) + + convert_prec(b) * + convert_prec(y[i][j])); + } +} + +namespace std +{ + +template +std::ostream& operator<<(std::ostream& os, const std::array,M>& x) +{ + for (size_t i = 0;i < M;i++) + for (size_t j = 0;j < N;j++) + os << '[' << i << "][" << j << "]: " << x[i][j] << std::endl; + return os; +} + +} // namespace std + +#define BLIS_FOR_ALL_TYPES0(macro, ...) macro(__VA_ARGS__); + +#define BLIS_FOR_TYPES_1R(...) \ +BLIS_FOR_ALL_TYPES0(__VA_ARGS__, float, s) \ +BLIS_FOR_ALL_TYPES0(__VA_ARGS__, double, d) + +#define BLIS_FOR_TYPES_1C(...) \ +BLIS_FOR_ALL_TYPES0(__VA_ARGS__, scomplex, c) \ +BLIS_FOR_ALL_TYPES0(__VA_ARGS__, dcomplex, z) + +#define BLIS_FOR_TYPES_1RC(...) \ +BLIS_FOR_TYPES_1R(__VA_ARGS__) \ +BLIS_FOR_TYPES_1C(__VA_ARGS__) + +#define BLIS_FOR_ALL_TYPES1(type, ...) PASTECH(BLIS_FOR_TYPES_1, type)(__VA_ARGS__) + +#define BLIS_FOR_TYPES_2R(...) \ +BLIS_FOR_ALL_TYPES1(__VA_ARGS__, float, s) \ +BLIS_FOR_ALL_TYPES1(__VA_ARGS__, double, d) + +#define BLIS_FOR_TYPES_2C(...) \ +BLIS_FOR_ALL_TYPES1(__VA_ARGS__, scomplex, c) \ +BLIS_FOR_ALL_TYPES1(__VA_ARGS__, dcomplex, z) + +#define BLIS_FOR_TYPES_2RC(...) \ +BLIS_FOR_TYPES_2R(__VA_ARGS__) \ +BLIS_FOR_TYPES_2C(__VA_ARGS__) + +#define BLIS_FOR_ALL_TYPES2(type, ...) PASTECH(BLIS_FOR_TYPES_2, type)(__VA_ARGS__) + +#define BLIS_FOR_TYPES_3R(...) \ +BLIS_FOR_ALL_TYPES2(__VA_ARGS__, float, s) \ +BLIS_FOR_ALL_TYPES2(__VA_ARGS__, double, d) + +#define BLIS_FOR_TYPES_3C(...) \ +BLIS_FOR_ALL_TYPES2(__VA_ARGS__, scomplex, c) \ +BLIS_FOR_ALL_TYPES2(__VA_ARGS__, dcomplex, z) + +#define BLIS_FOR_TYPES_3RC(...) \ +BLIS_FOR_TYPES_3R(__VA_ARGS__) \ +BLIS_FOR_TYPES_3C(__VA_ARGS__) + +#define BLIS_FOR_ALL_TYPES3(type, ...) PASTECH(BLIS_FOR_TYPES_3, type)(__VA_ARGS__) + +#define BLIS_FOR_TYPES_4R(...) \ +BLIS_FOR_ALL_TYPES3(__VA_ARGS__, float, s) \ +BLIS_FOR_ALL_TYPES3(__VA_ARGS__, double, d) + +#define BLIS_FOR_TYPES_4C(...) \ +BLIS_FOR_ALL_TYPES3(__VA_ARGS__, scomplex, c) \ +BLIS_FOR_ALL_TYPES3(__VA_ARGS__, dcomplex, z) + +#define BLIS_FOR_TYPES_4RC(...) \ +BLIS_FOR_TYPES_4R(__VA_ARGS__) \ +BLIS_FOR_TYPES_4C(__VA_ARGS__) + +#define BLIS_FOR_ALL_TYPES4(type, ...) PASTECH(BLIS_FOR_TYPES_4, type)(__VA_ARGS__) + +#define BLIS_FOR_TYPES_5R(...) \ +BLIS_FOR_ALL_TYPES4(__VA_ARGS__, float, s) \ +BLIS_FOR_ALL_TYPES4(__VA_ARGS__, double, d) + +#define BLIS_FOR_TYPES_5C(...) \ +BLIS_FOR_ALL_TYPES4(__VA_ARGS__, scomplex, c) \ +BLIS_FOR_ALL_TYPES4(__VA_ARGS__, dcomplex, z) + +#define BLIS_FOR_TYPES_5RC(...) \ +BLIS_FOR_TYPES_5R(__VA_ARGS__) \ +BLIS_FOR_TYPES_5C(__VA_ARGS__) + +#define BLIS_FOR_ALL_TYPES5(type, ...) PASTECH(BLIS_FOR_TYPES_5, type)(__VA_ARGS__) + +#define INSERT_GENTFUNC_MIX1(t1, opname) \ +BLIS_FOR_ALL_TYPES1(t1, GENTFUNC, opname) + +#define INSERT_GENTFUNC_MIX2(t1, t2, opname) \ +BLIS_FOR_ALL_TYPES2(t1, t2, GENTFUNC, opname) + +#define INSERT_GENTFUNC_MIX3(t1, t2, t3, opname) \ +BLIS_FOR_ALL_TYPES3(t1, t2, t3, GENTFUNC, opname) + +#define INSERT_GENTFUNC_MIX4(t1, t2, t3, t4, opname) \ +BLIS_FOR_ALL_TYPES4(t1, t2, t3, t4, GENTFUNC, opname) + +#define INSERT_GENTFUNC_MIX5(t1, t2, t3, t4, t5, opname) \ +BLIS_FOR_ALL_TYPES5(t1, t2, t3, t4, t5, GENTFUNC, opname) + +#endif + diff --git a/test/level0/test_l0.cxx b/test/level0/test_l0.cxx new file mode 100644 index 0000000000..a9ad384eb1 --- /dev/null +++ b/test/level0/test_l0.cxx @@ -0,0 +1,3167 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" +#include "bli_unit_testing.h" + +#include +#include +#include +#include + +int main() +{ + get_unit_test_registrar().run_tests(); +} + +/****************************************************************************** + * + * absq2s + * + *****************************************************************************/ + +// tabsq2s unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( norm( convert_prec( x ) ) ); \ +\ + ctypey y; \ + bli_tabsq2s( chx,chy,chc, x, y ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, absq2s ) + +// tabsq2ris unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( norm( convert_prec( x ) ) ); \ +\ + ctypey y; \ + bli_tabsq2ris( chx,chy,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, absq2ris ) + +/****************************************************************************** + * + * abval2s + * + *****************************************************************************/ + +// tabval2s unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( absolute( convert_prec( x ) ) ); \ +\ + ctypey y; \ + bli_tabval2s( chx,chy,chc, x, y ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, abval2s ) + +// tabval2ris unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( absolute( convert_prec( x ) ) ); \ +\ + ctypey y; \ + bli_tabval2ris( chx,chy,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, abval2ris ) + +#undef GENTFUNC +#define GENTFUNC(ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc, opname ) \ +UNIT_TEST(chx,chy,chz,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto z0 = convert( convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + ctypez z; \ + bli_tadd3s( chx,chy,chz,chc, x, y, z ); \ +\ + INFO( "z (C++): " << z0 ); \ + INFO( "z (BLIS): " << z ); \ +\ + check( z, z0 ); \ + } \ +) + +/****************************************************************************** + * + * add3s + * + *****************************************************************************/ + +INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3s); + +#undef GENTFUNC +#define GENTFUNC(ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc, opname ) \ +UNIT_TEST(chx,chy,chz,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto z0 = convert( conj( convert_prec( x ) ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + ctypez z; \ + bli_tadd3js( chx,chy,chz,chc, x, y, z ); \ +\ + INFO( "z (C++): " << z0 ); \ + INFO( "z (BLIS): " << z ); \ +\ + check( z, z0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3js); + +// tadd3ris unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto z0 = convert( convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + ctypez z; \ + bli_tadd3ris( chx,chy,chz,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ), \ + real( z ), imag( z ) ); \ +\ + INFO( "z (C++): " << z0 ); \ + INFO( "z (BLIS): " << z ); \ +\ + check( z, z0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3ris); + +// tadd3jris unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto z0 = convert( conj( convert_prec( x ) ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + ctypez z; \ + bli_tadd3jris( chx,chy,chz,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ), \ + real( z ), imag( z ) ); \ +\ + INFO( "z (C++): " << z0 ); \ + INFO( "z (BLIS): " << z ); \ +\ + check( z, z0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3jris); + +/****************************************************************************** + * + * adds + * + *****************************************************************************/ + +// tadds unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_tadds( chx,chy,chc, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX(RC, RC, C, adds); + +// taddjs unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( conj( convert_prec( x ) ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_taddjs( chx,chy,chc, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, addjs ) + +// taddris unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_taddris( chx,chy,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, addris ) + +// taddjris unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( conj( convert_prec( x ) ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_taddjris( chx,chy,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, addjris ) + +// tadds_mxn unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile( y ); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, 1.0, ymn0, dense ); \ +\ + INFO( "x:\n" << xmn ); \ + INFO( "y (init):\n" << ymn ); \ +\ + bli_tadds_mxn( chx,chy,chc, M, N, &xmn[0][0], N, 1, &ymn[0][0], N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +\ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile( y ); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, 1.0, ymn0, dense ); \ +\ + INFO( "x:\n" << xmn ); \ + INFO( "y (init):\n" << ymn ); \ +\ + bli_tadds_mxn( chx,chy,chc, N, M, &xmn[0][0], 1, N, &ymn[0][0], 1, N ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, adds_mxn ) + +/****************************************************************************** + * + * axpbys + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chb,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpbys( cha,chx,chb,chy,chc, a, x, b, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, C, axpbys ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chb,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + conj( convert_prec( x ) ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpbyjs( cha,chx,chb,chy,chc, a, x, b, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, C, axpbyjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chb,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpbyris( cha,chx,chb,chy,chc, \ + real( a ), imag( a ), \ + real( x ), imag( x ), \ + real( b ), imag( b ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, C, axpbyris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chb,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + conj( convert_prec( x ) ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpbyjris( cha,chx,chb,chy,chc, \ + real( a ), imag( a ), \ + real( x ), imag( x ), \ + real( b ), imag( b ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, C, axpbyjris ) + +/****************************************************************************** + * + * axpys + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpys( cha,chx,chy,chc, a, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, axpys ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + conj( convert_prec( x ) ) + \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpyjs( cha,chx,chy,chc, a, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, axpyjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpyris( cha,chx,chy,chc, \ + real( a ), imag( a ), \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, axpyris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + conj( convert_prec( x ) ) + \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpyjris( cha,chx,chy,chc, \ + real( a ), imag( a ), \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, axpyjris ) + +/****************************************************************************** + * + * conjs + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + for ( auto y : test_values() ) \ + { \ + auto y0 = conj( y ); \ +\ + INFO( "y (init): " << y ); \ +\ + bli_tconjs( chy, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( C, conjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + for ( auto y : test_values() ) \ + { \ + auto y0 = conj( y ); \ +\ + INFO( "y (init): " << y ); \ +\ + bli_tconjris( chy, real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( C, conjris ) + +/****************************************************************************** + * + * copycjs + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( bli_is_conj( conjx ) ? conj( x ) : x ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tcopycjs( chx,chy, conjx, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, copycjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( bli_is_conj( conjx ) ? conj( x ) : x ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "x: " << x); \ +\ + ctypey y; \ + bli_tcopycjris( chx,chy, conjx, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, copycjris ) + +/****************************************************************************** + * + * copynzs + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + real( y0 ) = real( x ); \ + if ( is_complex::value ) \ + imag( y0 ) = imag( x ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_tcopynzs( chx,chy, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, copynzs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + real( y0 ) = real( x ); \ + if ( is_complex::value ) \ + imag( y0 ) = -imag( x ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_tcopynzjs( chx,chy, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, copynzjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + real( y0 ) = real( x ); \ + if ( is_complex::value ) \ + imag( y0 ) = imag( x ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_tcopynzris( chx,chy, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, copynzris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + real( y0 ) = real( x ); \ + if ( is_complex::value ) \ + imag( y0 ) = -imag( x ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_tcopynzjris( chx,chy, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, copynzjris ) + +/****************************************************************************** + * + * copys + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tcopys( chx,chy, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, copys ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( conj( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tcopyjs( chx,chy, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, copyjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tcopyris( chx,chy, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, copyris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( conj( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tcopyjris( chx,chy, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, copyjris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile(); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, 0.0, ymn0, dense ); \ +\ + INFO( "x:\n" << xmn ); \ +\ + bli_tcopys_mxn( chx,chy, M, N, &xmn[0][0], N, 1, &ymn[0][0], N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +\ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile(); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, 0.0, ymn0, dense ); \ +\ + INFO( "x:\n" << xmn ); \ +\ + bli_tcopys_mxn( chx,chy, N, M, &xmn[0][0], 1, N, &ymn[0][0], 1, N ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, copys_mxn ) + +/****************************************************************************** + * + * dots + * + *****************************************************************************/ + +// No tests, dot(x, y, a) == axpy(y, x, a) + +/****************************************************************************** + * + * eqs + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto expected = convert_prec( x ) == \ + convert_prec( y ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + auto found = bli_teqs( chx,chy,chc, x, y ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, RC, eqs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto expected = convert_prec( x ) == \ + convert_prec( y ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + auto found = bli_teqris( chx,chy,chc, \ + real(x), imag(x), \ + real(y), imag(y) ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, RC, eqris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto expected = x == convert_prec( 1.0 ); \ +\ + INFO( "x: " << x ); \ +\ + auto found = bli_teq1ris( chx, real( x ), imag( x ) ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, eq1ris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto expected = x == convert_prec( 0.0 ); \ +\ + INFO( "x: " << x ); \ +\ + auto found = bli_teq0ris( chx, real( x ), imag( x ) ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, eq0ris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto expected = x == convert_prec( -1.0 ); \ +\ + INFO( "x: " << x ); \ +\ + auto found = bli_teqm1ris( chx, real( x ), imag( x ) ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, eqm1ris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto expected = x == convert_prec( 1.0 ); \ +\ + INFO( "x: " << x ); \ +\ + auto found = bli_teq1s( chx, x ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, eq1s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto expected = x == convert_prec( 0.0 ); \ +\ + INFO( "x: " << x ); \ +\ + auto found = bli_teq0s( chx, x ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, eq0s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto expected = x == convert_prec( -1.0 ); \ +\ + INFO( "x: " << x ); \ +\ + auto found = bli_teqm1s( chx, x ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, eqm1s ) + +/****************************************************************************** + * + * fprints + * + *****************************************************************************/ + +// No tests + +/****************************************************************************** + * + * gets + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ +\ + using ctypeyr = make_real_t; \ + using ctypeyc = make_complex_t; \ +\ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypeyr yr, yi; \ + bli_tgets( chx,chy, x, yr, yi ); \ +\ + INFO( "yr (C++): " << real( y0 ) ); \ + INFO( "yi (C++): " << imag( y0 ) ); \ + INFO( "yr (BLIS): " << yr ); \ + INFO( "yi (BLIS): " << yi ); \ +\ + check( yr, real( y0 ) ); \ + check( yi, imag( y0 ) ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, RC, gets ) + +/****************************************************************************** + * + * inverts + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(chx,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( 1.0 ) / \ + convert_prec( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tinverts( chx,chc, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, inverts ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(chx,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( 1.0 ) / \ + convert_prec( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tinvertris( chx,chc, real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, invertris ) + +/****************************************************************************** + * + * invscals + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) / \ + convert_prec( a ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tinvscals( cha,chx,chc, a, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, invscals ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) / \ + convert_prec( conj( a ) ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tinvscaljs( cha,chx,chc, a, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, invscaljs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) / \ + convert_prec( a ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tinvscalris( cha,chx,chc, \ + real( a ), imag( a ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, invscalris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) / \ + convert_prec( conj( a ) ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tinvscaljris( cha,chx,chc, \ + real( a ), imag( a ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, invscaljris ) + +/****************************************************************************** + * + * neg2s + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( -x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tneg2s( chx,chy, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, neg2s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( -x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tneg2ris( chx,chy, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, C, neg2ris ) + +/****************************************************************************** + * + * randnp2s + * + *****************************************************************************/ + +// No tests + +/****************************************************************************** + * + * rands + * + *****************************************************************************/ + +// No tests + +/****************************************************************************** + * + * scal2s + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tscal2s( cha,chx,chy,chc, a, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, scal2s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( conj( x ) ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tscal2js( cha,chx,chy,chc, a, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, scal2js ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tscal2ris( cha,chx,chy,chc, \ + real( a ), imag( a ), \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, scal2ris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( conj( x ) ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tscal2jris( cha,chx,chy,chc, \ + real( a ), imag( a ), \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, scal2jris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile(); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( a, bli_is_conj( conjx ) ? conj( xmn ) : xmn, 0.0, ymn0, dense ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "a: " << a ); \ + INFO( "x:\n" << xmn ); \ +\ + bli_tscal2s_mxn( cha,chx,chy,chc, conjx, M, N, &a, &xmn[0][0], N, 1, &ymn[0][0], N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +\ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile(); \ +\ + INFO("column-major"); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( a, bli_is_conj( conjx ) ? conj( xmn ) : xmn, 0.0, ymn0, dense ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "a: " << a ); \ + INFO( "x:\n" << xmn ); \ +\ + bli_tscal2s_mxn( cha,chx,chy,chc, conjx, N, M, &a, &xmn[0][0], 1, N, &ymn[0][0], 1, N ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, scal2s_mxn ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile(); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( a, bli_is_conj( conjx ) \ + ? conj( xmn ) \ + : xmn, \ + 0.0, ymn0, dense ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "a: " << a ); \ + INFO( "x:\n" << xmn ); \ +\ + bli_tscal2ris_mxn( cha,chx,chy,chc, conjx, \ + M, N, &a, \ + &xmn[0][0], N, 1, \ + &ymn[0][0], 2*N, 2, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +\ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile(); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( a, bli_is_conj( conjx ) \ + ? conj( xmn ) \ + : xmn, \ + 0.0, ymn0, dense ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "a: " << a ); \ + INFO( "x:\n" << xmn ); \ +\ + bli_tscal2ris_mxn( cha,chx,chy,chc, \ + conjx, N, M, &a, \ + &xmn[0][0], 1, N, \ + &ymn[0][0], 2, 2*N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, C, C, C, scal2ris_mxn_1 ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + using ctypeyr = make_real_t; \ +\ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto yrmn = tile(); \ + auto yimn = tile(); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = tile(); \ + axpbys_mxn( a, bli_is_conj( conjx ) \ + ? conj( xmn ) \ + : xmn, \ + 0.0, ymn0, dense ); \ + auto yrmn0 = real( ymn0 ); \ + auto yimn0 = imag( ymn0 ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "a: " << a ); \ + INFO( "x:\n" << xmn ); \ +\ + bli_tscal2ris_mxn( cha,chx,chy,chc, \ + conjx, M, N, &a, \ + &xmn[0][0], N, 1, \ + &yrmn[0][0], N, 1, \ + &yimn[0][0] - &yrmn[0][0] ); \ +\ + INFO( "yr (C++):\n" << yrmn0 ); \ + INFO( "yi (C++):\n" << yimn0 ); \ + INFO( "yr (BLIS):\n" << yrmn ); \ + INFO( "yi (BLIS):\n" << yimn ); \ +\ + check( yrmn, yrmn0 ); \ + check( yimn, yimn0 ); \ + } \ +\ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto yrmn = tile(); \ + auto yimn = tile(); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = tile(); \ + axpbys_mxn( a, bli_is_conj( conjx ) \ + ? conj( xmn ) \ + : xmn, \ + 0.0, ymn0, dense ); \ + auto yrmn0 = real( ymn0 ); \ + auto yimn0 = imag( ymn0 ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "a: " << a ); \ + INFO( "x:\n" << xmn ); \ +\ + bli_tscal2ris_mxn( cha,chx,chy,chc, \ + conjx, N, M, &a, \ + &xmn[0][0], 1, N, \ + &yrmn[0][0], 1, N, \ + &yimn[0][0] - &yrmn[0][0] ); \ +\ + INFO( "yr (C++):\n" << yrmn0 ); \ + INFO( "yi (C++):\n" << yimn0 ); \ + INFO( "yr (BLIS):\n" << yrmn ); \ + INFO( "yi (BLIS):\n" << yimn ); \ +\ + check( yrmn, yrmn0 ); \ + check( yimn, yimn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, C, C, C, scal2ris_mxn_k ) + +/****************************************************************************** + * + * scals + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tscals( cha,chx,chc, a, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, scals ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( conj( a ) ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tscaljs( cha,chx,chc, a, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, scaljs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tscalris( cha,chx,chc, \ + real( a ), imag( a ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, scalris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( conj( a ) ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tscaljris( cha,chx,chc, \ + real( a ), imag( a ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, scaljris ) + +// xpbys_mxn_uplo unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto diagoff : { -1, 0, 1 } ) \ + { \ + auto xmn = tile( x ); \ +\ + INFO( "row-major" ); \ +\ + std::function func = is_below( diagoff ); \ + if ( uplo == BLIS_UPPER ) func = is_above( diagoff ); \ +\ + auto xmn0 = xmn; \ + axpbys_mxn( a, xmn, 0.0, xmn0, func ); \ +\ + INFO( "upper: " << ( uplo == BLIS_UPPER ) ); \ + INFO( "diagoff: " << diagoff ); \ + INFO( "a: " << a ); \ + INFO( "x (init):\n" << xmn ); \ +\ + bli_tscalris_mxn_uplo( cha,chx,chc, uplo, diagoff, M, N, \ + &real( a ), &real( a )+1, \ + &real( xmn[0][0] ), &real( xmn[0][0] )+1, \ + &real( xmn[1][0] ) - &real( xmn[0][0] ), \ + &real( xmn[0][1] ) - &real( xmn[0][0] ) ); \ +\ + INFO( "x (C++):\n" << xmn0 ); \ + INFO( "x (BLIS):\n" << xmn ); \ +\ + check( xmn, xmn0 ); \ + } \ +\ + for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto diagoff : { -1, 0, 1 } ) \ + { \ + auto xmn = tile( x ); \ +\ + INFO( "column-major" ); \ +\ + std::function func = is_below( diagoff ); \ + if ( uplo == BLIS_UPPER ) func = is_above( diagoff ); \ +\ + auto xmn0 = xmn; \ + axpbys_mxn( a, xmn, 0.0, xmn0, func ); \ +\ + INFO( "upper: " << ( uplo == BLIS_UPPER ) ); \ + INFO( "diagoff: " << diagoff ); \ + INFO( "a: " << a ); \ + INFO( "x (init):\n" << xmn ); \ +\ + bli_tscalris_mxn_uplo( cha,chx,chc, uplo, diagoff, N, M, \ + &real( a ), &real( a )+1, \ + &real( xmn[0][0] ), &real( xmn[0][0] )+1, \ + &real( xmn[0][1] ) - &real( xmn[0][0] ), \ + &real( xmn[1][0] ) - &real( xmn[0][0] ) ); \ +\ + INFO( "x (C++):\n" << xmn0 ); \ + INFO( "x (BLIS):\n" << xmn ); \ +\ + check( xmn, xmn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, scalris_mxn_uplo ) + +/****************************************************************************** + * + * sets + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tsets( chx,chy, real( x ), imag( x ), y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, RC, sets ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + real( y0 ) = convert_prec( real( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + bli_tsetrs( chx,chy, real( x ), y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, RC, setrs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + imag( y0 ) = convert_prec( imag( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + bli_tsetis( chx,chy, imag( x ), y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, RC, setis ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tsetris( chx,chy, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, RC, setris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( 0.0 ); \ +\ + INFO( "y (init): " << y ); \ +\ + bli_tset0s( chy, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, set0s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( 1.0 ); \ +\ + INFO( "y (init): " << y ); \ +\ + bli_tset1s( chy, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, set1s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + real( y0 ) = convert_prec( 0.0 ); \ +\ + INFO( "y (init): " << y ); \ +\ + bli_tsetr0s( chy, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, setr0s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + imag( y0 ) = convert_prec( 0.0 ); \ +\ + INFO( "y (init): " << y ); \ +\ + bli_tseti0s( chy, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, seti0s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( 0.0 ); \ +\ + bli_tset0ris( chy, real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, set0ris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ + \ + for ( auto y : test_values() ) \ + { \ + auto ymn = tile( y ); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = tile( convert( 0.0 ) ); \ +\ + INFO( "y (init):\n" << ymn); \ +\ + bli_tset0s_mxn( chy, M, N, &ymn[0][0], N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ + \ + for ( auto y : test_values() ) \ + { \ + auto ymn = tile( y ); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = tile( convert( 0.0 ) ); \ +\ + INFO( "y (init):\n" << ymn ); \ +\ + bli_tset0s_mxn( chy, N, M, &ymn[0][0], 1, N ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, set0s_mxn ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + /* TODO */ \ +) + +//INSERT_GENTFUNC_MIX1( C, set0bbs_mxn ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypey, chy ) \ +UNIT_TEST(cha,chy,opname) \ +( \ + /* TODO */ \ +) + +//INSERT_GENTFUNC_MIX2( RC, C, set1ms_mxn_diag ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypey, chy ) \ +UNIT_TEST(cha,chy,opname) \ +( \ + /* TODO */ \ +) + +//INSERT_GENTFUNC_MIX2( RC, C, set1ms_mxn_uplo ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypey, chy ) \ +UNIT_TEST(cha,chy,opname) \ +( \ + /* TODO */ \ +) + +//INSERT_GENTFUNC_MIX2( RC, C, set1ms_mxn ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + /* TODO */ \ +) + +//INSERT_GENTFUNC_MIX1( C, seti01ms_mxn_diag ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chy,chc,opname) \ +( \ + /* TODO */ \ +) + +//INSERT_GENTFUNC_MIX3( RC, RC, C, setrihs_mxn_diag ) + +/****************************************************************************** + * + * sqrt2s + * + *****************************************************************************/ + +// tsqrt2s unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( square_root( convert_prec( x ) ) ); \ +\ + ctypey y; \ + bli_tsqrt2s( chx,chy,chc, x, y ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( R, R, R, sqrt2s ) + +// tsqrt2ris unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( square_root( convert_prec( x ) ) ); \ +\ + ctypey y; \ + bli_tsqrt2ris( chx,chy,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( R, R, R, sqrt2ris ) + +/****************************************************************************** + * + * subs + * + *****************************************************************************/ + +// tsubs unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( y ) - \ + convert_prec( x ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_tsubs( chx,chy,chc, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, subs ) + +// tsubjs unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( y ) - \ + conj( convert_prec( x ) ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_tsubjs( chx,chy,chc, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, subjs ) + +// tsubris unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( y ) - \ + convert_prec( x ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_tsubris( chx,chy,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, subris ) + +// tsubjris unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( y ) - \ + conj( convert_prec( x ) ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_tsubjris( chx,chy,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, C, subjris ) + +/****************************************************************************** + * + * swaps + * + *****************************************************************************/ + +// tswaps unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto x0 = convert( y ); \ + auto y0 = convert( x ); \ +\ + INFO( "x (init): " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_tswaps( chx,chy, x, y ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "x (BLIS): " << x ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( x, x0 ); \ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, RC, swaps ) + +// tswapris unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto x0 = convert( y ); \ + auto y0 = convert( x ); \ +\ + INFO( "x (init): " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_tswapris( chx,chy, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "x (BLIS): " << x ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( x, x0 ); \ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, RC, swapris ) + +/****************************************************************************** + * + * xpbys + * + *****************************************************************************/ + +// txpbys unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chb,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_txpbys( chx,chb,chy,chc, x, b, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, xpbys ) + +// txpbyjs unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chb,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( conj( convert_prec( x ) ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_txpbyjs( chx,chb,chy,chc, x, b, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, xpbyjs ) + +// txpbyris unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chb,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_txpbyris( chx,chb,chy,chc, \ + real( x ), imag( x ), \ + real( b ), imag( b ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, xpbyris ) + +// txpbyjris +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chb,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( conj( convert_prec( x ) ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_txpbyjris( chx,chb,chy,chc, \ + real( x ), imag( x ), \ + real( b ), imag( b ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, xpbyjris ) + +// xpbys_mxn unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chb,chy,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile( y ); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, b, ymn0, dense ); \ +\ + INFO( "x:\n" << xmn ); \ + INFO( "b: " << b ); \ + INFO( "y (init):\n" << ymn ); \ +\ + bli_txpbys_mxn( chx,chb,chy,chc, M, N, &xmn[0][0], N, 1, &b, &ymn[0][0], N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +\ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile( y ); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, b, ymn0, dense ); \ +\ + INFO( "x:\n" << xmn ); \ + INFO( "b: " << b ); \ + INFO( "y (init):\n" << ymn ); \ +\ + bli_txpbys_mxn( chx,chb,chy,chc, N, M, &xmn[0][0], 1, N, &b, &ymn[0][0], 1, N ); \ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, xpbys_mxn ) + +// xpbys_mxn_uplo unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chb,chy,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + for ( auto diagoff : { -1, 0, 1 } ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile( y ); \ +\ + INFO( "row-major" ); \ +\ + std::function func = is_below( diagoff ); \ + if ( uplo == BLIS_UPPER ) func = is_above( diagoff ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, b, ymn0, func ); \ +\ + INFO( "upper: " << ( uplo == BLIS_UPPER ) ); \ + INFO( "diagoff: " << diagoff ); \ + INFO( "x:\n" << xmn ); \ + INFO( "b: " << b ); \ + INFO( "y (init):\n" << ymn ); \ +\ + bli_txpbys_mxn_uplo( chx,chb,chy,chc, diagoff, uplo, M, N, &xmn[0][0], N, 1, &b, &ymn[0][0], N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +\ + for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + for ( auto diagoff : { -1, 0, 1 } ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile( y ); \ +\ + INFO( "column-major" ); \ +\ + std::function func = is_below( diagoff ); \ + if ( uplo == BLIS_UPPER ) func = is_above( diagoff ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, b, ymn0, func ); \ +\ + INFO( "upper: " << ( uplo == BLIS_UPPER ) ); \ + INFO( "diagoff: " << diagoff ); \ + INFO( "x:\n" << xmn ); \ + INFO( "b: " << b ); \ + INFO( "y (init):\n" << ymn ); \ +\ + bli_txpbys_mxn_uplo( chx,chb,chy,chc, diagoff, uplo, N, M, &xmn[0][0], 1, N, &b, &ymn[0][0], 1, N ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, C, xpbys_mxn_uplo ) + +/****************************************************************************** + * + * copy1es + * + *****************************************************************************/ + +// tcopy1es unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto yri0 = convert( x ); \ + auto yir0 = convert( swapri( conj( x ) ) ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey yri, yir; \ + bli_tcopy1es( chx,chy, x, yri, yir ); \ +\ + INFO( "yri (C++): " << yri0 ); \ + INFO( "yir (C++): " << yir0 ); \ + INFO( "yri (BLIS): " << yri ); \ + INFO( "yir (BLIS): " << yir ); \ +\ + check( yri, yri0 ); \ + check( yir, yir0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( C, C, copy1es ) + +// tcopyj1es unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto yri0 = convert( conj( x ) ); \ + auto yir0 = convert( swapri( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey yri, yir; \ + bli_tcopyj1es( chx,chy, x, yri, yir ); \ +\ + INFO( "yri (C++): " << yri0 ); \ + INFO( "yir (C++): " << yir0 ); \ + INFO( "yri (BLIS): " << yri ); \ + INFO( "yir (BLIS): " << yir ); \ +\ + check( yri, yri0 ); \ + check( yir, yir0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( C, C, copyj1es ) + +/****************************************************************************** + * + * invert1es + * + *****************************************************************************/ + +// tinvert1es unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(chx,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto xri = x; \ + auto xir = swapri( conj( x ) ); \ +\ + auto xri0 = convert( convert_prec( 1.0 ) / \ + convert_prec( x ) ); \ + auto xir0 = swapri( conj( xri0 ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "xri (orig): " << xri ); \ + INFO( "xir (orig): " << xir ); \ +\ + bli_tinvert1es( chx,chc, xri, xir ); \ +\ + INFO( "xri (C++): " << xri0 ); \ + INFO( "xir (C++): " << xir0 ); \ + INFO( "xri (BLIS): " << xri ); \ + INFO( "xir (BLIS): " << xir ); \ +\ + check( xri, xri0 ); \ + check( xir, xir0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( C, C, invert1es ) + +/****************************************************************************** + * + * scal21es + * + *****************************************************************************/ + +// tscal21es unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto yri0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ + auto yir0 = swapri( conj( yri0 ) ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey yri, yir; \ + bli_tscal21es( cha,chx,chy,chc, a, x, yri, yir ); \ +\ + INFO( "yri (C++): " << yri0 ); \ + INFO( "yir (C++): " << yir0 ); \ + INFO( "yri (BLIS): " << yri ); \ + INFO( "yir (BLIS): " << yir ); \ +\ + check( yri, yri0 ); \ + check( yir, yir0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, C, C, C, scal21es ) + +// tscal2j1es unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto yri0 = convert( convert_prec( a ) * \ + convert_prec( conj( x ) ) ); \ + auto yir0 = swapri( conj( yri0 ) ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey yri, yir; \ + bli_tscal2j1es( cha,chx,chy,chc, a, x, yri, yir ); \ +\ + INFO( "yri (C++): " << yri0 ); \ + INFO( "yir (C++): " << yir0 ); \ + INFO( "yri (BLIS): " << yri ); \ + INFO( "yir (BLIS): " << yir ); \ +\ + check( yri, yri0 ); \ + check( yir, yir0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, C, C, C, scal2j1es ) + +/****************************************************************************** + * + * scal1es + * + *****************************************************************************/ + +// tscal1es unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xri = x; \ + auto xir = swapri( conj( x ) ); \ +\ + auto xri0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ + auto xir0 = swapri( conj( xri0 ) ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "xri (orig): " << xri ); \ + INFO( "xir (orig): " << xir ); \ +\ + bli_tscal1es( cha,chx,chc, a, xri, xir ); \ +\ + INFO( "xri (C++): " << xri0 ); \ + INFO( "xir (C++): " << xir0 ); \ + INFO( "xri (BLIS): " << xri ); \ + INFO( "xir (BLIS): " << xir ); \ +\ + check( xri, xri0 ); \ + check( xir, xir0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, C, C, scal1es ) + +/****************************************************************************** + * + * copy1rs + * + *****************************************************************************/ + +// tcopy1rs unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tcopy1rs( chx,chy, x, real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( C, C, copy1rs ) + +/****************************************************************************** + * + * invert1rs + * + *****************************************************************************/ + +// tinvert1rs unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(chx,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto x0 = convert( convert_prec( 1.0 ) / \ + convert_prec( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + bli_tinvert1rs( chx,chc, real( x ), imag( x ) ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "x (BLIS): " << x ); \ +\ + check( x, x0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( C, C, invert1rs ) + +/****************************************************************************** + * + * scal21rs + * + *****************************************************************************/ + +// tscal21rs unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xr = real( x ); \ + auto xi = imag( x ); \ +\ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tscal21rs( cha,chx,chy,chc, a, x, real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( yri, yri0 ); \ + check( yir, yir0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, C, C, C, scal21rs ) + +// tscal2j1rs unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xr = real( x ); \ + auto xi = imag( x ); \ +\ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( conj( x ) ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tscal2j1rs( cha,chx,chy,chc, a, x, real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( yri, yri0 ); \ + check( yir, yir0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, C, C, C, scal2j1rs ) + +/****************************************************************************** + * + * scal1rs + * + *****************************************************************************/ + +// tscal1rs unit test +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto x0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x (orig): " << x ); \ +\ + bli_tscal1rs( cha,chx,chc, a, real( x ), imag( x ) ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "xr(BLIS): " << x ); \ +\ + check( x, x0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, C, C, scal1rs ) diff --git a/testsuite/src/test_amaxv.c b/testsuite/src/test_amaxv.c index 6d25888515..3bed62a8a8 100644 --- a/testsuite/src/test_amaxv.c +++ b/testsuite/src/test_amaxv.c @@ -413,17 +413,17 @@ void PASTEMAC(ch,varname) \ the behavior of netlib BLAS's i?amax() routines. */ \ if ( bli_zero_dim1( n ) ) \ { \ - PASTEMAC(i,copys)( *zero_i, *index ); \ + bli_tcopys( i,i, *zero_i, *index ); \ return; \ } \ \ /* Initialize the index of the maximum absolute value to zero. */ \ - PASTEMAC(i,copys)( *zero_i, index_l ); \ + bli_tcopys( i,i, *zero_i, index_l ); \ \ /* Initialize the maximum absolute value search candidate with -1, which is guaranteed to be less than all values we will compute. */ \ - PASTEMAC(chr,copys)( *minus_one, abs_chi1_max ); \ + bli_tcopys( chr,chr, *minus_one, abs_chi1_max ); \ \ { \ for ( i = 0; i < n; ++i ) \ @@ -431,23 +431,23 @@ void PASTEMAC(ch,varname) \ ctype* chi1 = x + (i )*incx; \ \ /* Get the real and imaginary components of chi1. */ \ - PASTEMAC(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \ + bli_tgets( ch,chr, *chi1, chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ - PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \ - PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \ + bli_tabval2s( chr,chr,chr, chi1_r, chi1_r ); \ + bli_tabval2s( chr,chr,chr, chi1_i, chi1_i ); \ \ /* Add the real and imaginary absolute values together. */ \ - PASTEMAC(chr,set0s)( abs_chi1 ); \ - PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \ - PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \ + bli_tset0s( chr, abs_chi1 ); \ + bli_tadds( chr,chr,chr, chi1_r, abs_chi1 ); \ + bli_tadds( chr,chr,chr, chi1_i, abs_chi1 ); \ \ /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ \ - if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \ + if ( abs_chi1_max < abs_chi1 || PASTEMAC(chr,isnan)( abs_chi1 ) ) \ { \ abs_chi1_max = abs_chi1; \ index_l = i; \ @@ -456,7 +456,7 @@ void PASTEMAC(ch,varname) \ } \ \ /* Store the final index to the output variable. */ \ - PASTEMAC(i,copys)( index_l, *index ); \ + bli_tcopys( i,i, index_l, *index ); \ } INSERT_GENTFUNCR_BASIC( amaxv_test ) diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index aed0cd8178..35bac9d495 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -1345,10 +1345,10 @@ char* libblis_test_get_string_for_result( double resid, char* r_val; // Before checking against the thresholds, make sure the residual is - // neither NaN nor Inf. (Note that bli_isnan() and bli_isinf() are + // neither NaN nor Inf. (Note that bli_disnan() and bli_disinf() are // both simply wrappers to the isnan() and isinf() macros defined // defined in math.h.) - if ( bli_isnan( resid ) || bli_isinf( resid ) ) + if ( bli_disnan( resid ) || bli_disinf( resid ) ) { r_val = libblis_test_fail_string; } diff --git a/testsuite/src/test_randm.c b/testsuite/src/test_randm.c index 8742695c6d..fafd0761d5 100644 --- a/testsuite/src/test_randm.c +++ b/testsuite/src/test_randm.c @@ -325,7 +325,7 @@ void PASTEMAC(ch,varname)( \ ctype_r sum; \ dim_t i, j; \ \ - PASTEMAC(chr,set0s)( sum ); \ + bli_tset0s( chr, sum ); \ \ for ( j = 0; j < n; j++ ) \ { \ @@ -333,12 +333,12 @@ void PASTEMAC(ch,varname)( \ { \ ctype* chi1 = x_cast + (i )*rs_x + (j )*cs_x; \ \ - PASTEMAC(ch,chr,abval2s)( *chi1, abs_chi1 ); \ - PASTEMAC(chr,chr,adds)( abs_chi1, sum ); \ + bli_tabval2s( ch,chr,chr, *chi1, abs_chi1 ); \ + bli_tadds( chr,chr,chr, abs_chi1, sum ); \ } \ } \ \ - PASTEMAC(chr,chr,copys)( sum, *sum_x_cast ); \ + bli_tcopys( chr,chr, sum, *sum_x_cast ); \ } INSERT_GENTFUNCR_BASIC( absumm ) diff --git a/testsuite/src/test_setm.c b/testsuite/src/test_setm.c index 80cebd64e0..966b697356 100644 --- a/testsuite/src/test_setm.c +++ b/testsuite/src/test_setm.c @@ -181,7 +181,7 @@ void libblis_test_setm_experiment // Randomize x. libblis_test_mobj_randomize( params, FALSE, &x ); - // Repeat the experiment n_repeats times and record results. + // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); @@ -295,7 +295,7 @@ void libblis_test_setm_check { chi1 = buf_x_cast + (i )*rs_x + (j )*cs_x; - if ( !bli_ceq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } + if ( !bli_teqs( c,c,c, *chi1, *beta_cast ) ) { *resid = 1.0; return; } } } } @@ -311,7 +311,7 @@ void libblis_test_setm_check { chi1 = buf_x_cast + (i )*rs_x + (j )*cs_x; - if ( !bli_zeq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } + if ( !bli_teqs( z,z,z, *chi1, *beta_cast ) ) { *resid = 1.0; return; } } } } diff --git a/testsuite/src/test_setv.c b/testsuite/src/test_setv.c index 10f0348c75..1983093d7e 100644 --- a/testsuite/src/test_setv.c +++ b/testsuite/src/test_setv.c @@ -179,7 +179,7 @@ void libblis_test_setv_experiment // Randomize x. libblis_test_vobj_randomize( params, FALSE, &x ); - // Repeat the experiment n_repeats times and record results. + // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); @@ -255,7 +255,7 @@ void libblis_test_setv_check for ( i = 0; i < m_x; ++i ) { if ( !bli_seq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } - + chi1 += inc_x; } } @@ -267,7 +267,7 @@ void libblis_test_setv_check for ( i = 0; i < m_x; ++i ) { if ( !bli_deq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } - + chi1 += inc_x; } } @@ -278,8 +278,8 @@ void libblis_test_setv_check for ( i = 0; i < m_x; ++i ) { - if ( !bli_ceq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } - + if ( !bli_teqs( c,c,c, *chi1, *beta_cast ) ) { *resid = 1.0; return; } + chi1 += inc_x; } } @@ -290,8 +290,8 @@ void libblis_test_setv_check for ( i = 0; i < m_x; ++i ) { - if ( !bli_zeq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } - + if ( !bli_teqs( z,z,z, *chi1, *beta_cast ) ) { *resid = 1.0; return; } + chi1 += inc_x; } } From f35f2c2c0f28c71b4a54dc9b9a87e1c4ebf8e1ca Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Sun, 3 Nov 2024 15:57:09 -0600 Subject: [PATCH 02/19] Fix problem with GEMMTRSM edge-case macros (affects haswell predominately). --- frame/include/bli_edge_case_macro_defs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/frame/include/bli_edge_case_macro_defs.h b/frame/include/bli_edge_case_macro_defs.h index bef0927472..5bac3081a5 100644 --- a/frame/include/bli_edge_case_macro_defs.h +++ b/frame/include/bli_edge_case_macro_defs.h @@ -205,8 +205,9 @@ output microtile. Used by trsm. */ \ if ( _use_ct ) \ { \ - PASTEMAC(ch,copys_mxn) \ + PASTEMAC(t,copys_mxn) \ ( \ + ch,ch, \ m, n, \ _ct, _rs_ct, _cs_ct, \ _c, _rs_c, _cs_c \ From 08c1d99273ba4bfdff1731475dd78c61addfbc84 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Sun, 3 Nov 2024 16:08:57 -0600 Subject: [PATCH 03/19] Use temporaries in level-0 axpys and scal2s. --- frame/include/level0/bli_taxpys.h | 16 +++++++++++++--- frame/include/level0/bli_tscal2s.h | 16 +++++++++++++--- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/frame/include/level0/bli_taxpys.h b/frame/include/level0/bli_taxpys.h index 9dec13ee17..b5b89c678c 100644 --- a/frame/include/level0/bli_taxpys.h +++ b/frame/include/level0/bli_taxpys.h @@ -37,8 +37,10 @@ // -- Implementation macro ----------------------------------------------------- -// (yr) += (ar) * (xr) - (ai) * (xi); -// (yi) += (ai) * (xr) + (ar) * (xi); +// (tr) += (ar) * (xr) - (ai) * (xi); +// (ti) += (ai) * (xr) + (ar) * (xi); +// (yr) += (tr); +// (yi) += (ti); #define bli_taxpyims( \ \ @@ -48,8 +50,9 @@ chc \ ) \ { \ - PASTEMAC(dy,assigns) \ + PASTEMAC(c,declinits) \ ( \ + py, \ PASTEMAC(chc,py,tcast)( \ PASTEMAC(chc,add)( \ PASTEMAC(py,chc,tcast)(yr), \ @@ -92,6 +95,13 @@ ) \ ) \ ), \ + tr, \ + ti \ + ); \ + PASTEMAC(dy,assigns) \ + ( \ + tr, \ + ti, \ yr, \ yi \ ); \ diff --git a/frame/include/level0/bli_tscal2s.h b/frame/include/level0/bli_tscal2s.h index 586ed66fb0..1f42cef9da 100644 --- a/frame/include/level0/bli_tscal2s.h +++ b/frame/include/level0/bli_tscal2s.h @@ -37,8 +37,10 @@ // -- Implementation macro ----------------------------------------------------- -// (yr) := (ar) * (xr) - (ai) * (xi); -// (yi) := (ai) * (xr) + (ar) * (xi); +// (tr) := (ar) * (xr) - (ai) * (xi); +// (ti) := (ai) * (xr) + (ar) * (xi); +// (yr) := (tr); +// (yi) := (ti); #define bli_tscal2ims( \ \ @@ -48,8 +50,9 @@ chc \ ) \ { \ - PASTEMAC(dy,assigns) \ + PASTEMAC(c,declinits) \ ( \ + py, \ PASTEMAC(chc,py,tcast)( \ PASTEMAC(chc,sub)( \ PASTEMAC(da,dx,termrr)( \ @@ -86,6 +89,13 @@ ) \ ) \ ), \ + tr, \ + ti \ + ); \ + PASTEMAC(dy,assigns) \ + ( \ + tr, \ + ti, \ yr, \ yi \ ); \ From 4b6cad0888182e93475b9e33eaa9f6852b627d72 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Sun, 3 Nov 2024 21:22:51 -0600 Subject: [PATCH 04/19] Fix some places where old level-0 macros were still used. --- .../kernels/1/bli_axpyv_template_noopt_var1.c | 12 ++-- .../kernels/1/bli_dotv_template_noopt_var1.c | 18 +++--- .../1f/bli_axpy2v_template_noopt_var1.c | 48 +++++++-------- .../1f/bli_axpyf_template_noopt_var1.c | 16 ++--- .../1f/bli_dotaxpyv_template_noopt_var1.c | 54 ++++++++--------- .../1f/bli_dotxaxpyf_template_noopt_var1.c | 60 +++++++++---------- .../1f/bli_dotxf_template_noopt_var1.c | 20 +++---- .../kernels/3/bli_trsm_l_template_noopt_mxn.c | 8 +-- .../kernels/3/bli_trsm_u_template_noopt_mxn.c | 8 +-- frame/base/bli_obj.c | 30 +++++----- kernels/zen/1f/bli_axpyf_zen_int_4.c | 20 +++---- kernels/zen/1f/bli_axpyf_zen_int_5.c | 48 +++++++-------- kernels/zen/3/bli_gemmt_small.c | 48 +++++++-------- vendor/testcpp/test_sdsdot.cc | 10 ++-- 14 files changed, 200 insertions(+), 200 deletions(-) diff --git a/config/template/kernels/1/bli_axpyv_template_noopt_var1.c b/config/template/kernels/1/bli_axpyv_template_noopt_var1.c index 511514aeab..cc3c078c12 100644 --- a/config/template/kernels/1/bli_axpyv_template_noopt_var1.c +++ b/config/template/kernels/1/bli_axpyv_template_noopt_var1.c @@ -179,7 +179,7 @@ void bli_zaxpyv_template_noopt // Compute front edge cases if x and y were unaligned. for ( i = 0; i < n_pre; ++i ) { - bli_zaxpys( *alpha, *xp, *yp ); + bli_taxpys( z,z,z,z, *alpha, *xp, *yp ); xp += 1; yp += 1; } @@ -188,7 +188,7 @@ void bli_zaxpyv_template_noopt // yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { - bli_zaxpys( *alpha, *xp, *yp ); + bli_taxpys( z,z,z,z, *alpha, *xp, *yp ); xp += n_elem_per_iter; yp += n_elem_per_iter; @@ -197,7 +197,7 @@ void bli_zaxpyv_template_noopt // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { - bli_zaxpys( *alpha, *xp, *yp ); + bli_taxpys( z,z,z,z, *alpha, *xp, *yp ); xp += 1; yp += 1; } @@ -207,7 +207,7 @@ void bli_zaxpyv_template_noopt // Compute front edge cases if x and y were unaligned. for ( i = 0; i < n_pre; ++i ) { - bli_zaxpyjs( *alpha, *xp, *yp ); + bli_taxpyjs( z,z,z,z, *alpha, *xp, *yp ); xp += 1; yp += 1; } @@ -216,7 +216,7 @@ void bli_zaxpyv_template_noopt // yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { - bli_zaxpyjs( *alpha, *xp, *yp ); + bli_taxpyjs( z,z,z,z, *alpha, *xp, *yp ); xp += n_elem_per_iter; yp += n_elem_per_iter; @@ -225,7 +225,7 @@ void bli_zaxpyv_template_noopt // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { - bli_zaxpyjs( *alpha, *xp, *yp ); + bli_taxpyjs( z,z,z,z, *alpha, *xp, *yp ); xp += 1; yp += 1; } diff --git a/config/template/kernels/1/bli_dotv_template_noopt_var1.c b/config/template/kernels/1/bli_dotv_template_noopt_var1.c index 60ebf3e146..c59c0d6209 100644 --- a/config/template/kernels/1/bli_dotv_template_noopt_var1.c +++ b/config/template/kernels/1/bli_dotv_template_noopt_var1.c @@ -127,7 +127,7 @@ void bli_zdotv_template_noopt // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { - bli_zset0s( *rho ); + bli_tset0s( z, *rho ); return; } @@ -185,7 +185,7 @@ void bli_zdotv_template_noopt // Initialize accumulator to zero. - bli_zset0s( dotxy ); + bli_tset0s( z, dotxy ); conjx_use = conjx; @@ -204,7 +204,7 @@ void bli_zdotv_template_noopt // Compute front edge cases if x and y were unaligned. for ( i = 0; i < n_pre; ++i ) { - bli_zdots( *xp, *yp, dotxy ); + bli_tdots( z,z,z,z, *xp, *yp, dotxy ); xp += 1; yp += 1; } @@ -213,7 +213,7 @@ void bli_zdotv_template_noopt // yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { - bli_zdots( *xp, *yp, dotxy ); + bli_tdots( z,z,z,z, *xp, *yp, dotxy ); xp += n_elem_per_iter; yp += n_elem_per_iter; @@ -222,7 +222,7 @@ void bli_zdotv_template_noopt // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { - bli_zdots( *xp, *yp, dotxy ); + bli_tdots( z,z,z,z, *xp, *yp, dotxy ); xp += 1; yp += 1; } @@ -232,7 +232,7 @@ void bli_zdotv_template_noopt // Compute front edge cases if x and y were unaligned. for ( i = 0; i < n_pre; ++i ) { - bli_zdotjs( *xp, *yp, dotxy ); + bli_tdotjs( z,z,z,z, *xp, *yp, dotxy ); xp += 1; yp += 1; } @@ -241,7 +241,7 @@ void bli_zdotv_template_noopt // yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { - bli_zdotjs( *xp, *yp, dotxy ); + bli_tdotjs( z,z,z,z, *xp, *yp, dotxy ); xp += n_elem_per_iter; yp += n_elem_per_iter; @@ -250,7 +250,7 @@ void bli_zdotv_template_noopt // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { - bli_zdotjs( *xp, *yp, dotxy ); + bli_tdotjs( z,z,z,z, *xp, *yp, dotxy ); xp += 1; yp += 1; } @@ -259,7 +259,7 @@ void bli_zdotv_template_noopt // If conjugation on y was requested, we induce it by conjugating // the contents of dotxy. if ( bli_is_conj( conjy ) ) - bli_zconjs( dotxy ); + bli_tconjs( z, dotxy ); bli_tcopys( z,z, dotxy, *rho ); } diff --git a/config/template/kernels/1f/bli_axpy2v_template_noopt_var1.c b/config/template/kernels/1f/bli_axpy2v_template_noopt_var1.c index 5a12bf761f..649511cf03 100644 --- a/config/template/kernels/1f/bli_axpy2v_template_noopt_var1.c +++ b/config/template/kernels/1f/bli_axpy2v_template_noopt_var1.c @@ -194,8 +194,8 @@ void bli_zaxpy2v_template_noopt // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { - bli_zaxpys( *alpha1, *xp, *zp ); - bli_zaxpys( *alpha2, *yp, *zp ); + bli_taxpys( z,z,z,z, *alpha1, *xp, *zp ); + bli_taxpys( z,z,z,z, *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -207,8 +207,8 @@ void bli_zaxpy2v_template_noopt // to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { - bli_zaxpys( *alpha1, *xp, *zp ); - bli_zaxpys( *alpha2, *yp, *zp ); + bli_taxpys( z,z,z,z, *alpha1, *xp, *zp ); + bli_taxpys( z,z,z,z, *alpha2, *yp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; @@ -218,8 +218,8 @@ void bli_zaxpy2v_template_noopt // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { - bli_zaxpys( *alpha1, *xp, *zp ); - bli_zaxpys( *alpha2, *yp, *zp ); + bli_taxpys( z,z,z,z, *alpha1, *xp, *zp ); + bli_taxpys( z,z,z,z, *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -229,8 +229,8 @@ void bli_zaxpy2v_template_noopt // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { - bli_zaxpys( *alpha1, *xp, *zp ); - bli_zaxpyjs( *alpha2, *yp, *zp ); + bli_taxpys( z,z,z,z, *alpha1, *xp, *zp ); + bli_taxpyjs( z,z,z,z, *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -242,8 +242,8 @@ void bli_zaxpy2v_template_noopt // to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { - bli_zaxpys( *alpha1, *xp, *zp ); - bli_zaxpyjs( *alpha2, *yp, *zp ); + bli_taxpys( z,z,z,z, *alpha1, *xp, *zp ); + bli_taxpyjs( z,z,z,z, *alpha2, *yp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; @@ -253,8 +253,8 @@ void bli_zaxpy2v_template_noopt // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { - bli_zaxpys( *alpha1, *xp, *zp ); - bli_zaxpyjs( *alpha2, *yp, *zp ); + bli_taxpys( z,z,z,z, *alpha1, *xp, *zp ); + bli_taxpyjs( z,z,z,z, *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -264,8 +264,8 @@ void bli_zaxpy2v_template_noopt // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { - bli_zaxpyjs( *alpha1, *xp, *zp ); - bli_zaxpys( *alpha2, *yp, *zp ); + bli_taxpyjs( z,z,z,z, *alpha1, *xp, *zp ); + bli_taxpys( z,z,z,z, *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -277,8 +277,8 @@ void bli_zaxpy2v_template_noopt // to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { - bli_zaxpyjs( *alpha1, *xp, *zp ); - bli_zaxpys( *alpha2, *yp, *zp ); + bli_taxpyjs( z,z,z,z, *alpha1, *xp, *zp ); + bli_taxpys( z,z,z,z, *alpha2, *yp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; @@ -288,8 +288,8 @@ void bli_zaxpy2v_template_noopt // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { - bli_zaxpyjs( *alpha1, *xp, *zp ); - bli_zaxpys( *alpha2, *yp, *zp ); + bli_taxpyjs( z,z,z,z, *alpha1, *xp, *zp ); + bli_taxpys( z,z,z,z, *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -299,8 +299,8 @@ void bli_zaxpy2v_template_noopt // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { - bli_zaxpyjs( *alpha1, *xp, *zp ); - bli_zaxpyjs( *alpha2, *yp, *zp ); + bli_taxpyjs( z,z,z,z, *alpha1, *xp, *zp ); + bli_taxpyjs( z,z,z,z, *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -312,8 +312,8 @@ void bli_zaxpy2v_template_noopt // to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { - bli_zaxpyjs( *alpha1, *xp, *zp ); - bli_zaxpyjs( *alpha2, *yp, *zp ); + bli_taxpyjs( z,z,z,z, *alpha1, *xp, *zp ); + bli_taxpyjs( z,z,z,z, *alpha2, *yp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; @@ -323,8 +323,8 @@ void bli_zaxpy2v_template_noopt // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { - bli_zaxpyjs( *alpha1, *xp, *zp ); - bli_zaxpyjs( *alpha2, *yp, *zp ); + bli_taxpyjs( z,z,z,z, *alpha1, *xp, *zp ); + bli_taxpyjs( z,z,z,z, *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } diff --git a/config/template/kernels/1f/bli_axpyf_template_noopt_var1.c b/config/template/kernels/1f/bli_axpyf_template_noopt_var1.c index 6a40ed3554..834c2fc242 100644 --- a/config/template/kernels/1f/bli_axpyf_template_noopt_var1.c +++ b/config/template/kernels/1f/bli_axpyf_template_noopt_var1.c @@ -210,7 +210,7 @@ void bli_zaxpyf_template_noopt for ( j = 0; j < b_n; ++j ) { bli_tcopys( z,z, *xp[ j ], alpha_x[ j ] ); - bli_zscals( *alpha, alpha_x[ j ] ); + bli_tscals( z,z,z, *alpha, alpha_x[ j ] ); } } else // if ( bli_is_conj( conjx ) ) @@ -218,7 +218,7 @@ void bli_zaxpyf_template_noopt for ( j = 0; j < b_n; ++j ) { bli_tcopyjs( z,z, *xp[ j ], alpha_x[ j ] ); - bli_zscals( *alpha, alpha_x[ j ] ); + bli_tscals( z,z,z, *alpha, alpha_x[ j ] ); } } @@ -231,7 +231,7 @@ void bli_zaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zaxpys( alpha_x[ j ], *ap[ j ], *yp ); + bli_taxpys( z,z,z,z, alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += 1; } @@ -247,7 +247,7 @@ void bli_zaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zaxpys( alpha_x[ j ], *ap[ j ], *yp ); + bli_taxpys( z,z,z,z, alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += n_elem_per_iter; } @@ -259,7 +259,7 @@ void bli_zaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zaxpys( alpha_x[ j ], *ap[ j ], *yp ); + bli_taxpys( z,z,z,z, alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += 1; } @@ -273,7 +273,7 @@ void bli_zaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zaxpyjs( alpha_x[ j ], *ap[ j ], *yp ); + bli_taxpyjs( z,z,z,z, alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += 1; } @@ -289,7 +289,7 @@ void bli_zaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zaxpyjs( alpha_x[ j ], *ap[ j ], *yp ); + bli_taxpyjs( z,z,z,z, alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += n_elem_per_iter; } @@ -301,7 +301,7 @@ void bli_zaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zaxpyjs( alpha_x[ j ], *ap[ j ], *yp ); + bli_taxpyjs( z,z,z,z, alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += 1; } diff --git a/config/template/kernels/1f/bli_dotaxpyv_template_noopt_var1.c b/config/template/kernels/1f/bli_dotaxpyv_template_noopt_var1.c index ca1076e3ec..ae806d50d9 100644 --- a/config/template/kernels/1f/bli_dotaxpyv_template_noopt_var1.c +++ b/config/template/kernels/1f/bli_dotaxpyv_template_noopt_var1.c @@ -138,7 +138,7 @@ void bli_zdotaxpyv_template_noopt // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { - bli_zset0s( *rho ); + bli_tset0s( z, *rho ); return; } @@ -202,7 +202,7 @@ void bli_zdotaxpyv_template_noopt // Initialize accumulator to zero. - bli_zset0s( dotxy ); + bli_tset0s( z, dotxy ); conjxt_use = conjxt; @@ -222,8 +222,8 @@ void bli_zdotaxpyv_template_noopt // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { - bli_zdots( *xp, *yp, dotxy ); - bli_zaxpys( *alpha, *xp, *zp ); + bli_tdots( z,z,z,z, *xp, *yp, dotxy ); + bli_taxpys( z,z,z,z, *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -235,8 +235,8 @@ void bli_zdotaxpyv_template_noopt // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { - bli_zdots( *xp, *yp, dotxy ); - bli_zaxpys( *alpha, *xp, *zp ); + bli_tdots( z,z,z,z, *xp, *yp, dotxy ); + bli_taxpys( z,z,z,z, *alpha, *xp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; @@ -246,8 +246,8 @@ void bli_zdotaxpyv_template_noopt // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { - bli_zdots( *xp, *yp, dotxy ); - bli_zaxpys( *alpha, *xp, *zp ); + bli_tdots( z,z,z,z, *xp, *yp, dotxy ); + bli_taxpys( z,z,z,z, *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -257,8 +257,8 @@ void bli_zdotaxpyv_template_noopt // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { - bli_zdotjs( *xp, *yp, dotxy ); - bli_zaxpys( *alpha, *xp, *zp ); + bli_tdotjs( z,z,z,z, *xp, *yp, dotxy ); + bli_taxpys( z,z,z,z, *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -270,8 +270,8 @@ void bli_zdotaxpyv_template_noopt // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { - bli_zdotjs( *xp, *yp, dotxy ); - bli_zaxpys( *alpha, *xp, *zp ); + bli_tdotjs( z,z,z,z, *xp, *yp, dotxy ); + bli_taxpys( z,z,z,z, *alpha, *xp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; @@ -281,8 +281,8 @@ void bli_zdotaxpyv_template_noopt // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { - bli_zdotjs( *xp, *yp, dotxy ); - bli_zaxpys( *alpha, *xp, *zp ); + bli_tdotjs( z,z,z,z, *xp, *yp, dotxy ); + bli_taxpys( z,z,z,z, *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -292,8 +292,8 @@ void bli_zdotaxpyv_template_noopt // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { - bli_zdots( *xp, *yp, dotxy ); - bli_zaxpyjs( *alpha, *xp, *zp ); + bli_tdots( z,z,z,z, *xp, *yp, dotxy ); + bli_taxpyjs( z,z,z,z, *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -305,8 +305,8 @@ void bli_zdotaxpyv_template_noopt // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { - bli_zdots( *xp, *yp, dotxy ); - bli_zaxpyjs( *alpha, *xp, *zp ); + bli_tdots( z,z,z,z, *xp, *yp, dotxy ); + bli_taxpyjs( z,z,z,z, *alpha, *xp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; @@ -316,8 +316,8 @@ void bli_zdotaxpyv_template_noopt // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { - bli_zdots( *xp, *yp, dotxy ); - bli_zaxpyjs( *alpha, *xp, *zp ); + bli_tdots( z,z,z,z, *xp, *yp, dotxy ); + bli_taxpyjs( z,z,z,z, *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -327,8 +327,8 @@ void bli_zdotaxpyv_template_noopt // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { - bli_zdotjs( *xp, *yp, dotxy ); - bli_zaxpyjs( *alpha, *xp, *zp ); + bli_tdotjs( z,z,z,z, *xp, *yp, dotxy ); + bli_taxpyjs( z,z,z,z, *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -340,8 +340,8 @@ void bli_zdotaxpyv_template_noopt // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { - bli_zdotjs( *xp, *yp, dotxy ); - bli_zaxpyjs( *alpha, *xp, *zp ); + bli_tdotjs( z,z,z,z, *xp, *yp, dotxy ); + bli_taxpyjs( z,z,z,z, *alpha, *xp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; @@ -351,8 +351,8 @@ void bli_zdotaxpyv_template_noopt // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { - bli_zdotjs( *xp, *yp, dotxy ); - bli_zaxpyjs( *alpha, *xp, *zp ); + bli_tdotjs( z,z,z,z, *xp, *yp, dotxy ); + bli_taxpyjs( z,z,z,z, *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } @@ -361,7 +361,7 @@ void bli_zdotaxpyv_template_noopt // If conjugation on y was requested, we induce it by conjugating // the contents of rho. if ( bli_is_conj( conjy ) ) - bli_zconjs( dotxy ); + bli_tconjs( z, dotxy ); bli_tcopys( z,z, dotxy, *rho ); } diff --git a/config/template/kernels/1f/bli_dotxaxpyf_template_noopt_var1.c b/config/template/kernels/1f/bli_dotxaxpyf_template_noopt_var1.c index 2667d92722..468647ff2c 100644 --- a/config/template/kernels/1f/bli_dotxaxpyf_template_noopt_var1.c +++ b/config/template/kernels/1f/bli_dotxaxpyf_template_noopt_var1.c @@ -239,7 +239,7 @@ void bli_zdotxaxpyf_template_noopt for ( j = 0; j < b_n; ++j ) { bli_tcopys( z,z, *xp[ j ], alpha_x[ j ] ); - bli_zscals( *alpha, alpha_x[ j ] ); + bli_tscals( z,z,z, *alpha, alpha_x[ j ] ); } } else // if ( bli_is_conj( conjx ) ) @@ -247,14 +247,14 @@ void bli_zdotxaxpyf_template_noopt for ( j = 0; j < b_n; ++j ) { bli_tcopyjs( z,z, *xp[ j ], alpha_x[ j ] ); - bli_zscals( *alpha, alpha_x[ j ] ); + bli_tscals( z,z,z, *alpha, alpha_x[ j ] ); } } // Initialize our accumulators to zero. for ( j = 0; j < b_n; ++j ) { - bli_zset0s( At_w[ j ] ); + bli_tset0s( z, At_w[ j ] ); } @@ -278,8 +278,8 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zdots( *ap[ j ], *wp, At_w[ j ] ); - bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); + bli_tdots( z,z,z,z, *ap[ j ], *wp, At_w[ j ] ); + bli_tdots( z,z,z,z, *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } @@ -295,8 +295,8 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zdots( *ap[ j ], *wp, At_w[ j ] ); - bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); + bli_tdots( z,z,z,z, *ap[ j ], *wp, At_w[ j ] ); + bli_tdots( z,z,z,z, *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += n_elem_per_iter; } @@ -308,8 +308,8 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zdots( *ap[ j ], *wp, At_w[ j ] ); - bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); + bli_tdots( z,z,z,z, *ap[ j ], *wp, At_w[ j ] ); + bli_tdots( z,z,z,z, *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } @@ -323,8 +323,8 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); - bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); + bli_tdotjs( z,z,z,z, *ap[ j ], *wp, At_w[ j ] ); + bli_tdots( z,z,z,z, *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } @@ -340,8 +340,8 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); - bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); + bli_tdotjs( z,z,z,z, *ap[ j ], *wp, At_w[ j ] ); + bli_tdots( z,z,z,z, *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += n_elem_per_iter; } @@ -353,8 +353,8 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); - bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); + bli_tdotjs( z,z,z,z, *ap[ j ], *wp, At_w[ j ] ); + bli_tdots( z,z,z,z, *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } @@ -368,8 +368,8 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zdots( *ap[ j ], *wp, At_w[ j ] ); - bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); + bli_tdots( z,z,z,z, *ap[ j ], *wp, At_w[ j ] ); + bli_tdotjs( z,z,z,z, *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } @@ -385,8 +385,8 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zdots( *ap[ j ], *wp, At_w[ j ] ); - bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); + bli_tdots( z,z,z,z, *ap[ j ], *wp, At_w[ j ] ); + bli_tdotjs( z,z,z,z, *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += n_elem_per_iter; } @@ -398,8 +398,8 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zdots( *ap[ j ], *wp, At_w[ j ] ); - bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); + bli_tdots( z,z,z,z, *ap[ j ], *wp, At_w[ j ] ); + bli_tdotjs( z,z,z,z, *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } @@ -413,8 +413,8 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); - bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); + bli_tdotjs( z,z,z,z, *ap[ j ], *wp, At_w[ j ] ); + bli_tdotjs( z,z,z,z, *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } @@ -430,8 +430,8 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); - bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); + bli_tdotjs( z,z,z,z, *ap[ j ], *wp, At_w[ j ] ); + bli_tdotjs( z,z,z,z, *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += n_elem_per_iter; } @@ -443,8 +443,8 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); - bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); + bli_tdotjs( z,z,z,z, *ap[ j ], *wp, At_w[ j ] ); + bli_tdotjs( z,z,z,z, *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } @@ -459,7 +459,7 @@ void bli_zdotxaxpyf_template_noopt { for ( j = 0; j < b_n; ++j ) { - bli_zconjs( At_w[ j ] ); + bli_tconjs( z, At_w[ j ] ); } } @@ -467,8 +467,8 @@ void bli_zdotxaxpyf_template_noopt // scaling by beta. for ( j = 0; j < b_n; ++j ) { - bli_zscals( *beta, *yp[ j ] ); - bli_zaxpys( *alpha, At_w[ j ], *yp[ j ] ); + bli_tscals( z,z,z, *beta, *yp[ j ] ); + bli_taxpys( z,z,z,z, *alpha, At_w[ j ], *yp[ j ] ); } } diff --git a/config/template/kernels/1f/bli_dotxf_template_noopt_var1.c b/config/template/kernels/1f/bli_dotxf_template_noopt_var1.c index 650303afe1..ac62ff9997 100644 --- a/config/template/kernels/1f/bli_dotxf_template_noopt_var1.c +++ b/config/template/kernels/1f/bli_dotxf_template_noopt_var1.c @@ -227,7 +227,7 @@ void bli_zdotxf_template_noopt // Initialize our accumulators to zero. for ( i = 0; i < b_n; ++i ) { - bli_zset0s( Atx[ i ] ); + bli_tset0s( z, Atx[ i ] ); } @@ -249,7 +249,7 @@ void bli_zdotxf_template_noopt { for ( i = 0; i < b_n; ++i ) { - bli_zzzdots( *ap[ i ], *xp, Atx[ i ] ); + bli_tdots( z,z,z,z, *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += 1; } @@ -264,7 +264,7 @@ void bli_zdotxf_template_noopt { for ( i = 0; i < b_n; ++i ) { - bli_zzzdots( *ap[ i ], *xp, Atx[ i ] ); + bli_tdots( z,z,z,z, *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += n_elem_per_iter; } @@ -276,7 +276,7 @@ void bli_zdotxf_template_noopt { for ( i = 0; i < b_n; ++i ) { - bli_zzzdots( *ap[ i ], *xp, Atx[ i ] ); + bli_tdots( z,z,z,z, *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += 1; } @@ -290,7 +290,7 @@ void bli_zdotxf_template_noopt { for ( i = 0; i < b_n; ++i ) { - bli_zzzdotjs( *ap[ i ], *xp, Atx[ i ] ); + bli_tdotjs( z,z,z,z, *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += 1; } @@ -305,7 +305,7 @@ void bli_zdotxf_template_noopt { for ( i = 0; i < b_n; ++i ) { - bli_zzzdotjs( *ap[ i ], *xp, Atx[ i ] ); + bli_tdotjs( z,z,z,z, *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += n_elem_per_iter; } @@ -317,7 +317,7 @@ void bli_zdotxf_template_noopt { for ( i = 0; i < b_n; ++i ) { - bli_zzzdotjs( *ap[ i ], *xp, Atx[ i ] ); + bli_tdotjs( z,z,z,z, *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += 1; } @@ -332,7 +332,7 @@ void bli_zdotxf_template_noopt { for ( i = 0; i < b_n; ++i ) { - bli_zconjs( Atx[ i ] ); + bli_tconjs( z, Atx[ i ] ); } } @@ -341,8 +341,8 @@ void bli_zdotxf_template_noopt // scaling by beta. for ( i = 0; i < b_n; ++i ) { - bli_zzscals( *beta, *yp[ i ] ); - bli_zzzaxpys( *alpha, Atx[ i ], *yp[ i ] ); + bli_tscals( z,z,z, *beta, *yp[ i ] ); + bli_taxpys( z,z,z,z, *alpha, Atx[ i ], *yp[ i ] ); } } diff --git a/config/template/kernels/3/bli_trsm_l_template_noopt_mxn.c b/config/template/kernels/3/bli_trsm_l_template_noopt_mxn.c index 0a963a2d8e..2688a7bc58 100644 --- a/config/template/kernels/3/bli_trsm_l_template_noopt_mxn.c +++ b/config/template/kernels/3/bli_trsm_l_template_noopt_mxn.c @@ -116,22 +116,22 @@ void bli_ztrsm_l_template_noopt gamma11 = c11 + (i )*rs_c + (j )*cs_c; /* chi11 = chi11 - a10t * x01; */ - bli_zset0s( rho11 ); + bli_tset0s( z, rho11 ); for ( l = 0; l < n_behind; ++l ) { alpha10 = a10t + (l )*cs_a; chi01 = x01 + (l )*rs_b; - bli_zaxpys( *alpha10, *chi01, rho11 ); + bli_taxpys( z,z,z,z, *alpha10, *chi01, rho11 ); } - bli_zsubs( rho11, *chi11 ); + bli_tsubs( z,z,z, rho11, *chi11 ); /* chi11 = chi11 / alpha11; */ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ - bli_zscals( *alpha11, *chi11 ); + bli_tscals( z,z,z, *alpha11, *chi11 ); /* Output final result to matrix C. */ bli_tcopys( z,z, *chi11, *gamma11 ); diff --git a/config/template/kernels/3/bli_trsm_u_template_noopt_mxn.c b/config/template/kernels/3/bli_trsm_u_template_noopt_mxn.c index c65c5e3523..9d133b0371 100644 --- a/config/template/kernels/3/bli_trsm_u_template_noopt_mxn.c +++ b/config/template/kernels/3/bli_trsm_u_template_noopt_mxn.c @@ -116,22 +116,22 @@ void bli_ztrsm_u_template_noopt gamma11 = c11 + (i )*rs_c + (j )*cs_c; /* chi11 = chi11 - a12t * x21; */ - bli_zset0s( rho11 ); + bli_tset0s( z, rho11 ); for ( l = 0; l < n_behind; ++l ) { alpha12 = a12t + (l )*cs_a; chi21 = x21 + (l )*rs_b; - bli_zaxpys( *alpha12, *chi21, rho11 ); + bli_taxpys( z,z,z,z, *alpha12, *chi21, rho11 ); } - bli_zsubs( rho11, *chi11 ); + bli_tsubs( z,z,z, rho11, *chi11 ); /* chi11 = chi11 / alpha11; */ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ - bli_zscals( *alpha11, *chi11 ); + bli_tscals( z,z,z, *alpha11, *chi11 ); /* Output final result to matrix C. */ bli_tcopys( z,z, *chi11, *gamma11 ); diff --git a/frame/base/bli_obj.c b/frame/base/bli_obj.c index 0c22f1a131..0d0d3bf666 100644 --- a/frame/base/bli_obj.c +++ b/frame/base/bli_obj.c @@ -121,8 +121,8 @@ void bli_obj_create_without_buffer // scenarios. Failing to do this can lead to reading uninitialized // memory just before calling the macrokernel (as the internal scalars // for A and B are merged). - //if ( bli_is_float( dt ) ) { bli_sset1s( *(( float* )s) ); } - //else if ( bli_is_double( dt ) ) { bli_dset1s( *(( double* )s) ); } + //if ( bli_is_float( dt ) ) { bli_tset1s( s, *(( float* )s) ); } + //else if ( bli_is_double( dt ) ) { bli_tset1s( d, *(( double* )s) ); } if ( bli_is_float( dt ) ) { bli_tset1s( c, *(( scomplex* )s) ); } else if ( bli_is_double( dt ) ) { bli_tset1s( z, *(( dcomplex* )s) ); } else if ( bli_is_scomplex( dt ) ) { bli_tset1s( c, *(( scomplex* )s) ); } @@ -312,10 +312,10 @@ void bli_obj_free //temp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, obj ); //temp_i = bli_obj_buffer_for_const( BLIS_INT, obj ); - bli_dssets( value, 0.0, *temp_s ); - bli_ddsets( value, 0.0, *temp_d ); - bli_dcsets( value, 0.0, *temp_c ); - bli_dzsets( value, 0.0, *temp_z ); + bli_tsets( d,s, value, 0.0, *temp_s ); + bli_tsets( d,d, value, 0.0, *temp_d ); + bli_tsets( d,c, value, 0.0, *temp_c ); + bli_tsets( d,z, value, 0.0, *temp_z ); *temp_i = ( gint_t ) value; } @@ -347,33 +347,33 @@ void bli_obj_free buf_a = bli_obj_buffer_at_off( a ); - bli_zzsets( 0.0, 0.0, value ); + bli_tsets( z,z, 0.0, 0.0, value ); if ( bli_obj_is_float( a ) ) { - bli_szcopys( *(( float* )buf_a), value ); + bli_tcopys( s,z, *(( float* )buf_a), value ); } else if ( bli_obj_is_double( a ) ) { - bli_dzcopys( *(( double* )buf_a), value ); + bli_tcopys( d,z, *(( double* )buf_a), value ); } else if ( bli_obj_is_scomplex( a ) ) { - bli_czcopys( *(( scomplex* )buf_a), value ); + bli_tcopys( c,z, *(( scomplex* )buf_a), value ); } else if ( bli_obj_is_dcomplex( a ) ) { - bli_zzcopys( *(( dcomplex* )buf_a), value ); + bli_tcopys( z,z, *(( dcomplex* )buf_a), value ); } else { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } - bli_zscopys( value, *temp_s ); - bli_zdcopys( value, *temp_d ); - bli_zccopys( value, *temp_c ); - bli_zzcopys( value, *temp_z ); + bli_tcopys( z,s, value, *temp_s ); + bli_tcopys( z,d, value, *temp_d ); + bli_tcopys( z,c, value, *temp_c ); + bli_tcopys( z,z, value, *temp_z ); *temp_i = ( gint_t ) bli_zreal( value ); } diff --git a/kernels/zen/1f/bli_axpyf_zen_int_4.c b/kernels/zen/1f/bli_axpyf_zen_int_4.c index e360c2ade3..72055fc702 100644 --- a/kernels/zen/1f/bli_axpyf_zen_int_4.c +++ b/kernels/zen/1f/bli_axpyf_zen_int_4.c @@ -96,8 +96,8 @@ void bli_caxpyf_zen_int_4 scomplex* restrict y1 = y + (0 )*incy; scomplex alpha_chi1; - bli_ccopycjs( conjx, *chi1, alpha_chi1 ); - bli_cscals( *alpha, alpha_chi1 ); + bli_tcopycjs( c,c, conjx, *chi1, alpha_chi1 ); + bli_tscals( c,c,c, *alpha, alpha_chi1 ); f ( @@ -129,17 +129,17 @@ void bli_caxpyf_zen_int_4 const scomplex* restrict pchi2 = x + 2*incx ; const scomplex* restrict pchi3 = x + 3*incx ; - bli_ccopycjs( conjx, *pchi0, chi0 ); - bli_ccopycjs( conjx, *pchi1, chi1 ); - bli_ccopycjs( conjx, *pchi2, chi2 ); - bli_ccopycjs( conjx, *pchi3, chi3 ); + bli_tcopycjs( c,c, conjx, *pchi0, chi0 ); + bli_tcopycjs( c,c, conjx, *pchi1, chi1 ); + bli_tcopycjs( c,c, conjx, *pchi2, chi2 ); + bli_tcopycjs( c,c, conjx, *pchi3, chi3 ); } // Scale each chi scalar by alpha. - bli_cscals( *alpha, chi0 ); - bli_cscals( *alpha, chi1 ); - bli_cscals( *alpha, chi2 ); - bli_cscals( *alpha, chi3 ); + bli_tscals( c,c,c, *alpha, chi0 ); + bli_tscals( c,c,c, *alpha, chi1 ); + bli_tscals( c,c,c, *alpha, chi2 ); + bli_tscals( c,c,c, *alpha, chi3 ); lda *= 2; incx *= 2; diff --git a/kernels/zen/1f/bli_axpyf_zen_int_5.c b/kernels/zen/1f/bli_axpyf_zen_int_5.c index 5f21cfefa3..9ef7f34639 100644 --- a/kernels/zen/1f/bli_axpyf_zen_int_5.c +++ b/kernels/zen/1f/bli_axpyf_zen_int_5.c @@ -116,8 +116,8 @@ void bli_saxpyf_zen_int_5 float* restrict y1 = y + (0 )*incy; float alpha_chi1; - bli_scopycjs( conjx, *chi1, alpha_chi1 ); - bli_sscals( *alpha, alpha_chi1 ); + bli_tcopycjs( s,s, conjx, *chi1, alpha_chi1 ); + bli_tscals( s,s,s, *alpha, alpha_chi1 ); f ( @@ -150,11 +150,11 @@ void bli_saxpyf_zen_int_5 // Scale each chi scalar by alpha. - bli_sscals( *alpha, chi0 ); - bli_sscals( *alpha, chi1 ); - bli_sscals( *alpha, chi2 ); - bli_sscals( *alpha, chi3 ); - bli_sscals( *alpha, chi4 ); + bli_tscals( s,s,s, *alpha, chi0 ); + bli_tscals( s,s,s, *alpha, chi1 ); + bli_tscals( s,s,s, *alpha, chi2 ); + bli_tscals( s,s,s, *alpha, chi3 ); + bli_tscals( s,s,s, *alpha, chi4 ); // Broadcast the (alpha*chi?) scalars to all elements of vector registers. chi0v.v = _mm256_broadcast_ss( &chi0 ); @@ -365,8 +365,8 @@ void bli_daxpyf_zen_int_5 double* restrict y1 = y + (0 )*incy; double alpha_chi1; - bli_dcopycjs( conjx, *chi1, alpha_chi1 ); - bli_dscals( *alpha, alpha_chi1 ); + bli_tcopycjs( d,d, conjx, *chi1, alpha_chi1 ); + bli_tscals( d,d,d, *alpha, alpha_chi1 ); f ( @@ -399,11 +399,11 @@ void bli_daxpyf_zen_int_5 // Scale each chi scalar by alpha. - bli_dscals( *alpha, chi0 ); - bli_dscals( *alpha, chi1 ); - bli_dscals( *alpha, chi2 ); - bli_dscals( *alpha, chi3 ); - bli_dscals( *alpha, chi4 ); + bli_tscals( d,d,d, *alpha, chi0 ); + bli_tscals( d,d,d, *alpha, chi1 ); + bli_tscals( d,d,d, *alpha, chi2 ); + bli_tscals( d,d,d, *alpha, chi3 ); + bli_tscals( d,d,d, *alpha, chi4 ); // Broadcast the (alpha*chi?) scalars to all elements of vector registers. chi0v.v = _mm256_broadcast_sd( &chi0 ); @@ -617,8 +617,8 @@ void bli_daxpyf_zen_int_16x2 double* restrict y1 = y + (0 )*incy; double alpha_chi1; - bli_dcopycjs( conjx, *chi1, alpha_chi1 ); - bli_dscals( *alpha, alpha_chi1 ); + bli_tcopycjs( d,d, conjx, *chi1, alpha_chi1 ); + bli_tscals( d,d,d, *alpha, alpha_chi1 ); f ( @@ -646,8 +646,8 @@ void bli_daxpyf_zen_int_16x2 // Scale each chi scalar by alpha. - bli_dscals( *alpha, chi0 ); - bli_dscals( *alpha, chi1 ); + bli_tscals( d,d,d, *alpha, chi0 ); + bli_tscals( d,d,d, *alpha, chi1 ); // Broadcast the (alpha*chi?) scalars to all elements of vector registers. chi0v.v = _mm256_broadcast_sd( &chi0 ); @@ -905,8 +905,8 @@ void bli_daxpyf_zen_int_16x4 double* restrict y1 = y + (0 )*incy; double alpha_chi1; - bli_dcopycjs( conjx, *chi1, alpha_chi1 ); - bli_dscals( *alpha, alpha_chi1 ); + bli_tcopycjs( d,d, conjx, *chi1, alpha_chi1 ); + bli_tscals( d,d,d, *alpha, alpha_chi1 ); f ( @@ -937,10 +937,10 @@ void bli_daxpyf_zen_int_16x4 chi3 = *( x + 3*incx ); // Scale each chi scalar by alpha. - bli_dscals( *alpha, chi0 ); - bli_dscals( *alpha, chi1 ); - bli_dscals( *alpha, chi2 ); - bli_dscals( *alpha, chi3 ); + bli_tscals( d,d,d, *alpha, chi0 ); + bli_tscals( d,d,d, *alpha, chi1 ); + bli_tscals( d,d,d, *alpha, chi2 ); + bli_tscals( d,d,d, *alpha, chi3 ); // Broadcast the (alpha*chi?) scalars to all elements of vector registers. chi0v.v = _mm256_broadcast_sd( &chi0 ); diff --git a/kernels/zen/3/bli_gemmt_small.c b/kernels/zen/3/bli_gemmt_small.c index 619aa0fff4..546ba7906a 100644 --- a/kernels/zen/3/bli_gemmt_small.c +++ b/kernels/zen/3/bli_gemmt_small.c @@ -1603,7 +1603,7 @@ static err_t bli_sgemmt_small } while (_i < M ) { - bli_sscopys( *(C + _i*rsc + _j*ldc), + bli_tcopys( s,s, *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } @@ -1616,7 +1616,7 @@ static err_t bli_sgemmt_small k = (k <= M) ? k : M; for ( _i = _j; _i < k; ++_i ) { - bli_sscopys( *(C + _i*rsc + _j*ldc), + bli_tcopys( s,s, *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } k = (M - _i) >> 3; @@ -1631,7 +1631,7 @@ static err_t bli_sgemmt_small } while (_i < M ) { - bli_sscopys( *(C + _i*rsc + _j*ldc), + bli_tcopys( s,s, *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } @@ -1654,7 +1654,7 @@ static err_t bli_sgemmt_small } while (_i <= _j ) { - bli_sscopys( *(C + _i*rsc + _j*ldc), + bli_tcopys( s,s, *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); ++_i; } @@ -1681,7 +1681,7 @@ static err_t bli_sgemmt_small } while (_i < M ) { - bli_sssxpbys( *(C + _i*rsc + _j*ldc), + bli_txpbys( s,s,s,s, *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; @@ -1695,7 +1695,7 @@ static err_t bli_sgemmt_small k = (k <= M) ? k : M; for ( _i = _j; _i < k; ++_i ) { - bli_sssxpbys( *(C + _i*rsc + _j*ldc), + bli_txpbys( s,s,s,s, *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } @@ -1713,7 +1713,7 @@ static err_t bli_sgemmt_small } while (_i < M ) { - bli_sssxpbys( *(C + _i*rsc + _j*ldc), + bli_txpbys( s,s,s,s, *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; @@ -1739,7 +1739,7 @@ static err_t bli_sgemmt_small } while (_i <= _j ) { - bli_sssxpbys( *(C + _i*rsc + _j*ldc), + bli_txpbys( s,s,s,s, *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); ++_i; @@ -3173,7 +3173,7 @@ static err_t bli_dgemmt_small } while (_i < M ) { - bli_ddcopys( *(C + _i*rsc + _j*ldc), + bli_tcopys( d,d, *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } @@ -3186,7 +3186,7 @@ static err_t bli_dgemmt_small k = (k <= M) ? k : M; for ( _i = _j; _i < k; ++_i ) { - bli_ddcopys( *(C + _i*rsc + _j*ldc), + bli_tcopys( d,d, *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } k = (M - _i) >> 2; @@ -3201,7 +3201,7 @@ static err_t bli_dgemmt_small } while (_i < M ) { - bli_ddcopys( *(C + _i*rsc + _j*ldc), + bli_tcopys( d,d, *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } @@ -3224,7 +3224,7 @@ static err_t bli_dgemmt_small } while (_i <= _j ) { - bli_ddcopys( *(C + _i*rsc + _j*ldc), + bli_tcopys( d,d, *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); ++_i; } @@ -3251,7 +3251,7 @@ static err_t bli_dgemmt_small } while (_i < M ) { - bli_dddxpbys( *(C + _i*rsc + _j*ldc), + bli_txpbys( d,d,d,d, *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; @@ -3265,7 +3265,7 @@ static err_t bli_dgemmt_small k = (k <= M) ? k : M; for ( _i = _j; _i < k; ++_i ) { - bli_dddxpbys( *(C + _i*rsc + _j*ldc), + bli_txpbys( d,d,d,d, *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } @@ -3283,7 +3283,7 @@ static err_t bli_dgemmt_small } while (_i < M ) { - bli_dddxpbys( *(C + _i*rsc + _j*ldc), + bli_txpbys( d,d,d,d, *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; @@ -3309,7 +3309,7 @@ static err_t bli_dgemmt_small } while (_i <= _j ) { - bli_dddxpbys( *(C + _i*rsc + _j*ldc), + bli_txpbys( d,d,d,d, *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); ++_i; @@ -3726,7 +3726,7 @@ static err_t bli_sgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i <= 0 ) { - bli_sscopys( *(C + _i*rsc + _j*ldc), + bli_tcopys( s,s, *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } @@ -3736,7 +3736,7 @@ static err_t bli_sgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i >= 0 ) { - bli_sscopys( *(C + _i*rsc + _j*ldc), + bli_tcopys( s,s, *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } @@ -3750,7 +3750,7 @@ static err_t bli_sgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i <= 0 ) { - bli_sssxpbys( *(C + _i*rsc + _j*ldc), + bli_txpbys( s,s,s,s, *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } @@ -3761,7 +3761,7 @@ static err_t bli_sgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i >= 0 ) { - bli_sssxpbys( *(C + _i*rsc + _j*ldc), + bli_txpbys( s,s,s,s, *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } @@ -4158,7 +4158,7 @@ static err_t bli_dgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i <= 0 ) { - bli_ddcopys( *(C + _i*rsc + _j*ldc), + bli_tcopys( d,d, *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } @@ -4168,7 +4168,7 @@ static err_t bli_dgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i >= 0 ) { - bli_ddcopys( *(C + _i*rsc + _j*ldc), + bli_tcopys( d,d, *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } @@ -4182,7 +4182,7 @@ static err_t bli_dgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i <= 0 ) { - bli_dddxpbys( *(C + _i*rsc + _j*ldc), + bli_txpbys( d,d,d,d, *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } @@ -4193,7 +4193,7 @@ static err_t bli_dgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i >= 0 ) { - bli_dddxpbys( *(C + _i*rsc + _j*ldc), + bli_txpbys( d,d,d,d, *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } diff --git a/vendor/testcpp/test_sdsdot.cc b/vendor/testcpp/test_sdsdot.cc index c903c97d33..230a2fc4bb 100644 --- a/vendor/testcpp/test_sdsdot.cc +++ b/vendor/testcpp/test_sdsdot.cc @@ -45,7 +45,7 @@ using namespace std; /* * Test application assumes matrices to be column major, non-transposed */ - + #if 0 template< typename T > void ref_sdsot(int64_t n, @@ -61,7 +61,7 @@ void ref_sdsot(int64_t n, obj_t obj_res; obj_t obj_alpha; num_t dt; - + if(is_same>::value) dt = BLIS_SCOMPLEX; else if(is_same>::value) @@ -72,10 +72,10 @@ void ref_sdsot(int64_t n, bli_obj_create_with_attached_buffer( dt, 1, 1, &alpha, 1,1,&obj_alpha ); bli_obj_create_with_attached_buffer( dt, 1, 1, res_ref, 1, 1,&obj_res ); - bli_ddots( &obj_x, + bli_tdots( d,d,d,d, &obj_x, &obj_y, &obj_res ); - + } #endif @@ -113,7 +113,7 @@ void test_sdsdot() printf("Dot product = %E \n", res); #endif - //ref_sdsot(n, aplha, X, Y , &res_ref ); + //ref_sdsot(n, aplha, X, Y , &res_ref ); #ifdef PRINT printf("Ref Dot product %E \n", res_ref); From 6970f231bfedeac8f28791aa6550c58e4afbdb6b Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Sun, 3 Nov 2024 21:23:05 -0600 Subject: [PATCH 05/19] Make it easier to override CXXLANGFLAGS. --- common.mk | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/common.mk b/common.mk index 5da49eef2c..80871e33e9 100644 --- a/common.mk +++ b/common.mk @@ -63,7 +63,6 @@ $(eval $(call store-var-for,CC, $(1))) $(eval $(call store-var-for,CC_VENDOR, $(1))) $(eval $(call store-var-for,CPPROCFLAGS,$(1))) $(eval $(call store-var-for,CLANGFLAGS, $(1))) -$(eval $(call store-var-for,CXXLANGFLAGS,$(1))) $(eval $(call store-var-for,CMISCFLAGS, $(1))) $(eval $(call store-var-for,CPICFLAGS, $(1))) $(eval $(call store-var-for,CWARNFLAGS, $(1))) @@ -109,8 +108,8 @@ get-noopt-cxxflags-for = $(strip $(CXXFLAGS_PRESET) \ $(call load-var-for,CWARNFLAGS,$(1)) \ $(call load-var-for,CPICFLAGS,$(1)) \ $(call load-var-for,CMISCFLAGS,$(1)) \ - $(call load-var-for,CXXLANGFLAGS,$(1)) \ $(call load-var-for,CPPROCFLAGS,$(1)) \ + $(CXXLANGFLAGS) \ $(CTHREADFLAGS) \ $(CXXTHREADFLAGS) \ $(CINCFLAGS) \ @@ -914,7 +913,6 @@ endif else CXXLANGFLAGS := endif -$(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CXXLANGFLAGS,$(c)))) # --- C Preprocessor flags --- From f527dccc2762d920daaa0cbcbdd25ef5b7962032 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Sun, 3 Nov 2024 21:23:21 -0600 Subject: [PATCH 06/19] Fix some bugs in the new level-0 macros. --- frame/include/level0/bli_tadd3s.h | 21 ++++++++++++++------- frame/include/level0/bli_tcopys.h | 4 ++-- frame/include/level0/bli_tscal2s.h | 18 +++++++++--------- frame/include/level0/bli_tscalcjs.h | 8 ++++---- frame/include/level0/bli_tscals.h | 6 +++--- frame/include/level0/bli_tsets.h | 28 ++++++++++++++-------------- frame/include/level0/bli_tsqrt2s.h | 6 ++++-- 7 files changed, 50 insertions(+), 41 deletions(-) diff --git a/frame/include/level0/bli_tadd3s.h b/frame/include/level0/bli_tadd3s.h index d3c700d699..43e090fbaf 100644 --- a/frame/include/level0/bli_tadd3s.h +++ b/frame/include/level0/bli_tadd3s.h @@ -90,7 +90,6 @@ PASTEMAC(chc,prec) \ ) -// tadd3s unit test #undef GENTFUNC #define GENTFUNC( ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc, opname ) \ UNIT_TEST(chx,chy,chz,chc,opname) \ @@ -137,9 +136,9 @@ UNIT_TEST(chx,chy,chz,chc,opname) \ // -- Exposed real/imaginary -- -// taddris -#define bli_taddris( chx, chy, chc, xr, xi, yr, yi ) \ - bli_taddims \ +// tadd3ris +#define bli_tadd3ris( chx, chy, chz, chc, xr, xi, yr, yi, zr, zi ) \ + bli_tadd3ims \ ( \ PASTEMAC(chx,dom), \ PASTEMAC(chx,prec), \ @@ -149,12 +148,16 @@ UNIT_TEST(chx,chy,chz,chc,opname) \ PASTEMAC(chy,prec), \ yr, \ yi, \ + PASTEMAC(chz,dom), \ + PASTEMAC(chz,prec), \ + zr, \ + zi, \ PASTEMAC(chc,prec) \ ) -// taddjris -#define bli_taddjris( chx, chy, chc, xr, xi, yr, yi ) \ - bli_taddims \ +// tadd3jris +#define bli_tadd3jris( chx, chy, chz, chc, xr, xi, yr, yi, zr, zi ) \ + bli_tadd3ims \ ( \ PASTEMAC(chx,dom), \ PASTEMAC(chx,prec), \ @@ -165,6 +168,10 @@ UNIT_TEST(chx,chy,chz,chc,opname) \ PASTEMAC(chy,prec), \ yr, \ yi, \ + PASTEMAC(chz,dom), \ + PASTEMAC(chz,prec), \ + zr, \ + zi, \ PASTEMAC(chc,prec) \ ) diff --git a/frame/include/level0/bli_tcopys.h b/frame/include/level0/bli_tcopys.h index e65403787d..188db13068 100644 --- a/frame/include/level0/bli_tcopys.h +++ b/frame/include/level0/bli_tcopys.h @@ -129,7 +129,7 @@ PASTEMAC(chx,dom), \ PASTEMAC(chx,prec), \ PASTEMAC(chx,real)(x), \ - PASTEMAC(chx,imag)(x) \ + PASTEMAC(chx,imag)(x), \ PASTEMAC(chy,dom), \ PASTEMAC(chy,prec), \ PASTEMAC(chy,real)(yri), \ @@ -183,7 +183,7 @@ PASTEMAC(chx,dom), \ PASTEMAC(chx,prec), \ PASTEMAC(chx,real)(x), \ - PASTEMAC(chx,imag)(x) \ + PASTEMAC(chx,imag)(x), \ PASTEMAC(chy,dom), \ PASTEMAC(chy,prec), \ yr, \ diff --git a/frame/include/level0/bli_tscal2s.h b/frame/include/level0/bli_tscal2s.h index 1f42cef9da..5dc4e25e5b 100644 --- a/frame/include/level0/bli_tscal2s.h +++ b/frame/include/level0/bli_tscal2s.h @@ -363,11 +363,11 @@ const inc_t ldy2 = 2 * (ldy); \ \ ctypea_r* restrict alpha_r = ( ctypea_r* )(alpha); \ - ctypea_r* restrict alpha_i = ( ctypea_r* )(alpha) + 1; \ + ctypea_r* restrict alpha_i = ( ctypea_r* )(alpha) + 1; (void)alpha_i; \ ctypex_r* restrict chi_r = ( ctypex_r* )(x); \ - ctypex_r* restrict chi_i = ( ctypex_r* )(x) + 1; \ + ctypex_r* restrict chi_i = ( ctypex_r* )(x) + 1; (void)chi_i; \ ctypey_r* restrict psi_r = ( ctypey_r* )(y); \ - ctypey_r* restrict psi_i = ( ctypey_r* )(y) + 1*d; \ + ctypey_r* restrict psi_i = ( ctypey_r* )(y) + 1*d; (void)psi_i; \ \ if ( bli_is_conj( conjx ) ) \ { \ @@ -381,9 +381,9 @@ for ( dim_t i = 0; i < (m); ++i ) \ { \ ctypex_r* restrict chiij_r = chij_r + i*incx2; \ - ctypex_r* restrict chiij_i = chij_i + i*incx2; \ + ctypex_r* restrict chiij_i = chij_i + i*incx2; (void)chiij_i; \ ctypey_r* restrict psiij_r = psij_r + i*incy2; \ - ctypey_r* restrict psiij_i = psij_i + i*incy2; \ + ctypey_r* restrict psiij_i = psij_i + i*incy2; (void)psiij_i; \ \ bli_tscal2jris( cha,chx,chy,chc, \ *alpha_r, *alpha_i, \ @@ -393,7 +393,7 @@ for ( dim_t p = 1; p < d; ++p ) \ { \ ctypey_r* restrict psiijd_r = psiij_r + p*ds_y; \ - ctypey_r* restrict psiijd_i = psiij_i + p*ds_y; \ + ctypey_r* restrict psiijd_i = psiij_i + p*ds_y; (void)psiijd_i; \ \ bli_tcopyris( chy,chy, *psiij_r, *psiij_i, \ *psiijd_r, *psiijd_i ); \ @@ -413,9 +413,9 @@ for ( dim_t i = 0; i < (m); ++i ) \ { \ ctypex_r* restrict chiij_r = chij_r + i*incx2; \ - ctypex_r* restrict chiij_i = chij_i + i*incx2; \ + ctypex_r* restrict chiij_i = chij_i + i*incx2; (void)chiij_i; \ ctypey_r* restrict psiij_r = psij_r + i*incy2; \ - ctypey_r* restrict psiij_i = psij_i + i*incy2; \ + ctypey_r* restrict psiij_i = psij_i + i*incy2; (void)psiij_i; \ \ bli_tscal2ris( cha,chx,chy,chc, \ *alpha_r, *alpha_i, \ @@ -425,7 +425,7 @@ for ( dim_t p = 1; p < d; ++p ) \ { \ ctypey_r* restrict psiijd_r = psiij_r + p*ds_y; \ - ctypey_r* restrict psiijd_i = psiij_i + p*ds_y; \ + ctypey_r* restrict psiijd_i = psiij_i + p*ds_y; (void)psiijd_i; \ \ bli_tcopyris( chy,chy, *psiij_r, *psiij_i, \ *psiijd_r, *psiijd_i ); \ diff --git a/frame/include/level0/bli_tscalcjs.h b/frame/include/level0/bli_tscalcjs.h index b526aa0c61..8f2efaa0b5 100644 --- a/frame/include/level0/bli_tscalcjs.h +++ b/frame/include/level0/bli_tscalcjs.h @@ -64,7 +64,7 @@ PASTEMAC(da,dx,termii)( \ chc, \ PASTEMAC(chc,mul)( \ - ( bli_is_conj( conj ) ? PASTEMAC(pa,neg)( \ + ( bli_is_conj( conj ) ? PASTEMAC(chc,neg)( \ PASTEMAC(pa,chc,tcast)(ai) \ ) \ : PASTEMAC(pa,chc,tcast)(ai) \ @@ -77,7 +77,7 @@ PASTEMAC(da,dx,termir)( \ chc, \ PASTEMAC(chc,mul)( \ - ( bli_is_conj( conj ) ? PASTEMAC(pa,neg)( \ + ( bli_is_conj( conj ) ? PASTEMAC(chc,neg)( \ PASTEMAC(pa,chc,tcast)(ai) \ ) \ : PASTEMAC(pa,chc,tcast)(ai) \ @@ -98,8 +98,8 @@ ) \ PASTEMAC(dx,assigns) \ ( \ - tr, \ - ti, \ + PASTEMAC(chc,px,tcast)(tr), \ + PASTEMAC(chc,px,tcast)(ti), \ xr, \ xi \ ); \ diff --git a/frame/include/level0/bli_tscals.h b/frame/include/level0/bli_tscals.h index 709860c986..eaa2d9b986 100644 --- a/frame/include/level0/bli_tscals.h +++ b/frame/include/level0/bli_tscals.h @@ -89,8 +89,8 @@ ) \ PASTEMAC(dx,assigns) \ ( \ - tr, \ - ti, \ + PASTEMAC(chc,px,tcast)(tr), \ + PASTEMAC(chc,px,tcast)(ti), \ xr, \ xi \ ); \ @@ -195,7 +195,7 @@ PASTEMAC(chx,imag)(xir) \ ) -// scal1es +// scal1rs #define bli_tscal1rs( cha, chx, chc, a, xr, xi ) \ bli_tscalims \ ( \ diff --git a/frame/include/level0/bli_tsets.h b/frame/include/level0/bli_tsets.h index a97b70379f..30ef235719 100644 --- a/frame/include/level0/bli_tsets.h +++ b/frame/include/level0/bli_tsets.h @@ -221,17 +221,17 @@ \ for ( dim_t _j = 0; _j < (n); ++_j ) \ { \ - PASTEMAC(chy,ctype)* restrict yj = (PASTEMAC(chy,ctype)*)(y) + _j*(ldy); \ + PASTEMAC(chy,ctype)* restrict _yj = (PASTEMAC(chy,ctype)*)(y) + _j*(ldy); \ \ for ( dim_t _i = 0; _i < (m); ++_i ) \ { \ - PASTEMAC(chy,ctype)* restrict yij = yj + _i*(incy); \ + PASTEMAC(chy,ctype)* restrict _yij = _yj + _i*(incy); \ \ for ( dim_t _p = 0; _p < _d; ++_p ) \ { \ - PASTEMAC(chy,ctype)* restrict yijd = yij + _p*_ds_y; \ + PASTEMAC(chy,ctype)* restrict _yijd = _yij + _p*_ds_y; \ \ - bli_tset0s( chy, *yijd ); \ + bli_tset0s( chy, *_yijd ); \ } \ } \ } \ @@ -246,19 +246,19 @@ \ for ( dim_t _j = 0; _j < (n); ++_j ) \ { \ - PASTEMAC(chy,ctype)* restrict yj = (PASTEMAC(chy,ctype)*)(y) + _j*(ldy); \ + PASTEMAC(chy,ctype)* restrict _yj = (PASTEMAC(chy,ctype)*)(y) + _j*(ldy); \ \ for ( dim_t _i = 0; _i < (m); ++_i ) \ { \ - PASTEMAC(chy,ctyper)* restrict yij_r = (PASTEMAC(chy,ctyper)*)( (PASTEMAC(chy,ctype)*)yj + _i*(incy) ); \ - PASTEMAC(chy,ctyper)* restrict yij_i = yij_r + (incy); \ + PASTEMAC(chy,ctyper)* restrict _yij_r = (PASTEMAC(chy,ctyper)*)( _yj + _i*(incy) ); \ + PASTEMAC(chy,ctyper)* restrict _yij_i = _yij_r + (incy); \ \ for ( dim_t _p = 1; _p < _d; ++_p ) \ { \ - PASTEMAC(chy,ctyper)* restrict yijd_r = yij_r + _p*_ds_y; \ - PASTEMAC(chy,ctyper)* restrict yijd_i = yij_i + _p*_ds_y; (void)yijd_i; \ + PASTEMAC(chy,ctyper)* restrict _yijd_r = _yij_r + _p*_ds_y; \ + PASTEMAC(chy,ctyper)* restrict _yijd_i = _yij_i + _p*_ds_y; (void)_yijd_i; \ \ - bli_tcopyris( chy,chy, *yij_r, *yij_i, *yijd_r, *yijd_i ); \ + bli_tcopyris( chy,chy, *_yij_r, *_yij_i, *_yijd_r, *_yijd_i ); \ } \ } \ } \ @@ -271,9 +271,9 @@ bli_tset0s_mxn \ ( \ chp, \ - m - i, \ + (m) - (i), \ j, \ - p + (i)*1, 1, ldp \ + (p) + (i)*1, 1, ldp \ ); \ } \ \ @@ -283,8 +283,8 @@ ( \ chp, \ m, \ - n - j, \ - p + (j)*(ldp), 1, ldp \ + (n) - (j), \ + (p) + (j)*(ldp), 1, ldp \ ); \ } \ } diff --git a/frame/include/level0/bli_tsqrt2s.h b/frame/include/level0/bli_tsqrt2s.h index 9576683d13..a66384e3f9 100644 --- a/frame/include/level0/bli_tsqrt2s.h +++ b/frame/include/level0/bli_tsqrt2s.h @@ -65,8 +65,10 @@ { \ PASTEMAC(dy,assigns) \ ( \ - PASTEMAC(chc,sqrt)( \ - PASTEMAC(px,chc,tcast)( xr ) \ + PASTEMAC(chc,py,tcast)( \ + PASTEMAC(chc,sqrt)( \ + PASTEMAC(px,chc,tcast)( xr ) \ + ) \ ), \ PASTEMAC(py,zero), \ yr, \ From 662c162f8a36966aafabcb3020567b5132bf64aa Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Sun, 3 Nov 2024 21:23:35 -0600 Subject: [PATCH 07/19] Add testsuite for new level-0 macros (C++ based). --- test/level0/Makefile | 57 +- test/level0/test_l0.cxx | 3128 +---------------- .../{bli_unit_testing.h => test_l0.hpp} | 88 +- test/level0/test_tabsq2s.cxx | 86 + test/level0/test_tabval2s.cxx | 109 + test/level0/test_tadd3s.cxx | 148 + test/level0/test_tadds.cxx | 196 ++ test/level0/test_taxpbys.cxx | 228 ++ test/level0/test_taxpys.cxx | 156 + test/level0/test_tconjs.cxx | 84 + test/level0/test_tcopycjs.cxx | 92 + test/level0/test_tcopynzs.cxx | 150 + test/level0/test_tcopys.cxx | 280 ++ test/level0/test_tdots.cxx | 44 + test/level0/test_teqs.cxx | 218 ++ test/level0/test_tfprints.cxx | 44 + test/level0/test_tgets.cxx | 71 + test/level0/test_tinverts.cxx | 141 + test/level0/test_tinvscals.cxx | 146 + test/level0/test_tneg2s.cxx | 88 + test/level0/test_trandnp2s.cxx | 44 + test/level0/test_trands.cxx | 44 + test/level0/test_tscal2s.cxx | 511 +++ test/level0/test_tscalcjs.cxx | 68 + test/level0/test_tscals.cxx | 277 ++ test/level0/test_tsets.cxx | 361 ++ test/level0/test_tsqrt2s.cxx | 86 + test/level0/test_tsubs.cxx | 142 + test/level0/test_tswaps.cxx | 98 + test/level0/test_txpbys.cxx | 286 ++ 30 files changed, 4286 insertions(+), 3185 deletions(-) rename test/level0/{bli_unit_testing.h => test_l0.hpp} (91%) create mode 100644 test/level0/test_tabsq2s.cxx create mode 100644 test/level0/test_tabval2s.cxx create mode 100644 test/level0/test_tadd3s.cxx create mode 100644 test/level0/test_tadds.cxx create mode 100644 test/level0/test_taxpbys.cxx create mode 100644 test/level0/test_taxpys.cxx create mode 100644 test/level0/test_tconjs.cxx create mode 100644 test/level0/test_tcopycjs.cxx create mode 100644 test/level0/test_tcopynzs.cxx create mode 100644 test/level0/test_tcopys.cxx create mode 100644 test/level0/test_tdots.cxx create mode 100644 test/level0/test_teqs.cxx create mode 100644 test/level0/test_tfprints.cxx create mode 100644 test/level0/test_tgets.cxx create mode 100644 test/level0/test_tinverts.cxx create mode 100644 test/level0/test_tinvscals.cxx create mode 100644 test/level0/test_tneg2s.cxx create mode 100644 test/level0/test_trandnp2s.cxx create mode 100644 test/level0/test_trands.cxx create mode 100644 test/level0/test_tscal2s.cxx create mode 100644 test/level0/test_tscalcjs.cxx create mode 100644 test/level0/test_tscals.cxx create mode 100644 test/level0/test_tsets.cxx create mode 100644 test/level0/test_tsqrt2s.cxx create mode 100644 test/level0/test_tsubs.cxx create mode 100644 test/level0/test_tswaps.cxx create mode 100644 test/level0/test_txpbys.cxx diff --git a/test/level0/Makefile b/test/level0/Makefile index c07dce37c4..70ead01de8 100644 --- a/test/level0/Makefile +++ b/test/level0/Makefile @@ -1,6 +1,6 @@ #!/bin/bash # -# BLIS +# BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # @@ -36,7 +36,7 @@ # Makefile # # Field G. Van Zee -# +# # Makefile for standalone BLIS test drivers. # @@ -49,7 +49,6 @@ clean cleanx - # # --- Determine makefile fragment location ------------------------------------- # @@ -63,6 +62,7 @@ LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else +CONFIG_NAME := $(shell grep -E "CONFIG_NAME *:=" ../../config.mk | sed 's/.*:= *//') DIST_PATH := ../.. LIB_PATH = ../../lib/$(CONFIG_NAME) INC_PATH = ../../include/$(CONFIG_NAME) @@ -70,7 +70,6 @@ SHARE_PATH := ../.. endif - # # --- Include common makefile definitions -------------------------------------- # @@ -79,7 +78,6 @@ endif -include $(SHARE_PATH)/common.mk - # # --- General build definitions ------------------------------------------------ # @@ -88,56 +86,51 @@ TEST_SRC_PATH := . TEST_OBJ_PATH := . # Gather all local object files. -TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \ - $(TEST_OBJ_PATH)/%.o, \ - $(wildcard $(TEST_SRC_PATH)/*.c))) +SRC_SUFFIXES := c cxx cpp +TEST_OBJS := $(foreach suf, \ + $(SRC_SUFFIXES), \ + $(sort $(patsubst $(TEST_SRC_PATH)/%.$(suf), \ + $(TEST_OBJ_PATH)/%.o, \ + $(wildcard $(TEST_SRC_PATH)/*.$(suf))))) # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-user-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) +CXXINCFLAGS := -I$(INC_PATH) +CXXLANGFLAGS := -std=c++17 # Use the CFLAGS for the configuration family. -#CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) -CFLAGS := $(strip -mavx2 -mfma -mfpmath=sse -march=haswell \ - -funsafe-math-optimizations \ - $(call get-user-cflags-for,$(CONFIG_NAME))) -# -funsafe-math-optimizations -ffp-contract=fast +CFLAGS := $(call get-frame-cflags-for,$(CONFIG_NAME)) +CXXFLAGS := $(call get-frame-cxxflags-for,$(CONFIG_NAME)) # Add installed and local header paths to CFLAGS CFLAGS += -I$(TEST_SRC_PATH) +CXXFLAGS += -I$(TEST_SRC_PATH) -HEADERS := $(wildcard $(TEST_SRC_PATH)/*.h) - -# Locate the libblis library to which we will link. -#LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) - +HDR_SUFFIXES := h hpp +HEADERS := $(foreach suf, $(HDR_SUFFIXES), $(wildcard $(TEST_SRC_PATH)/*.$(suf))) # # --- Targets/rules ------------------------------------------------------------ # -all: test asm - -asm: test_l0.s +all: test test: test_l0.x - -# -- Object file rules -- - -$(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c - $(CC) $(CFLAGS) -c $< -o $@ - # -- Source file rules -- test_%.o: test_%.c Makefile $(HEADERS) $(CC) $(CFLAGS) -c $< -o $@ -test_%.s: test_%.c Makefile $(HEADERS) - $(CC) -S $(CFLAGS) -c $< -o $@ +test_%.o: test_%.cpp Makefile $(HEADERS) + $(CXX) $(CXXFLAGS) -c $< -o $@ + +test_%.o: test_%.cxx Makefile $(HEADERS) + $(CXX) $(CXXFLAGS) -c $< -o $@ # -- Executable file rules -- @@ -147,8 +140,8 @@ test_%.s: test_%.c Makefile $(HEADERS) # compatibility layer. This prevents BLIS from inadvertently getting called # for the BLAS routines we are trying to test with. -test_l0.x: test_l0.o $(LIBBLIS_LINK) - $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ +test_l0.x: $(TEST_OBJS) $(LIBBLIS_LINK) + $(CXX) $(TEST_OBJS) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ # -- Clean rules -- @@ -156,5 +149,5 @@ test_l0.x: test_l0.o $(LIBBLIS_LINK) clean: cleanx cleanx: - - $(RM_F) *.o *.x test_l0.s + - $(RM_F) *.o *.x diff --git a/test/level0/test_l0.cxx b/test/level0/test_l0.cxx index a9ad384eb1..a461d44a22 100644 --- a/test/level0/test_l0.cxx +++ b/test/level0/test_l0.cxx @@ -33,3135 +33,9 @@ */ -#include "blis.h" -#include "bli_unit_testing.h" - -#include -#include -#include -#include +#include "test_l0.hpp" int main() { get_unit_test_registrar().run_tests(); } - -/****************************************************************************** - * - * absq2s - * - *****************************************************************************/ - -// tabsq2s unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( norm( convert_prec( x ) ) ); \ -\ - ctypey y; \ - bli_tabsq2s( chx,chy,chc, x, y ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, absq2s ) - -// tabsq2ris unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( norm( convert_prec( x ) ) ); \ -\ - ctypey y; \ - bli_tabsq2ris( chx,chy,chc, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, absq2ris ) - -/****************************************************************************** - * - * abval2s - * - *****************************************************************************/ - -// tabval2s unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( absolute( convert_prec( x ) ) ); \ -\ - ctypey y; \ - bli_tabval2s( chx,chy,chc, x, y ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, abval2s ) - -// tabval2ris unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( absolute( convert_prec( x ) ) ); \ -\ - ctypey y; \ - bli_tabval2ris( chx,chy,chc, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, abval2ris ) - -#undef GENTFUNC -#define GENTFUNC(ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc, opname ) \ -UNIT_TEST(chx,chy,chz,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto z0 = convert( convert_prec( x ) + \ - convert_prec( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y: " << y ); \ -\ - ctypez z; \ - bli_tadd3s( chx,chy,chz,chc, x, y, z ); \ -\ - INFO( "z (C++): " << z0 ); \ - INFO( "z (BLIS): " << z ); \ -\ - check( z, z0 ); \ - } \ -) - -/****************************************************************************** - * - * add3s - * - *****************************************************************************/ - -INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3s); - -#undef GENTFUNC -#define GENTFUNC(ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc, opname ) \ -UNIT_TEST(chx,chy,chz,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto z0 = convert( conj( convert_prec( x ) ) + \ - convert_prec( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y: " << y ); \ -\ - ctypez z; \ - bli_tadd3js( chx,chy,chz,chc, x, y, z ); \ -\ - INFO( "z (C++): " << z0 ); \ - INFO( "z (BLIS): " << z ); \ -\ - check( z, z0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3js); - -// tadd3ris unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto z0 = convert( convert_prec( x ) + \ - convert_prec( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y: " << y ); \ -\ - ctypez z; \ - bli_tadd3ris( chx,chy,chz,chc, \ - real( x ), imag( x ), \ - real( y ), imag( y ), \ - real( z ), imag( z ) ); \ -\ - INFO( "z (C++): " << z0 ); \ - INFO( "z (BLIS): " << z ); \ -\ - check( z, z0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3ris); - -// tadd3jris unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto z0 = convert( conj( convert_prec( x ) ) + \ - convert_prec( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y: " << y ); \ -\ - ctypez z; \ - bli_tadd3jris( chx,chy,chz,chc, \ - real( x ), imag( x ), \ - real( y ), imag( y ), \ - real( z ), imag( z ) ); \ -\ - INFO( "z (C++): " << z0 ); \ - INFO( "z (BLIS): " << z ); \ -\ - check( z, z0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3jris); - -/****************************************************************************** - * - * adds - * - *****************************************************************************/ - -// tadds unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( x ) + \ - convert_prec( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (orig): " << y ); \ -\ - bli_tadds( chx,chy,chc, x, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX(RC, RC, C, adds); - -// taddjs unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( conj( convert_prec( x ) ) + \ - convert_prec( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (orig): " << y ); \ -\ - bli_taddjs( chx,chy,chc, x, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, addjs ) - -// taddris unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( x ) + \ - convert_prec( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (orig): " << y ); \ -\ - bli_taddris( chx,chy,chc, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, addris ) - -// taddjris unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( conj( convert_prec( x ) ) + \ - convert_prec( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (orig): " << y ); \ -\ - bli_taddjris( chx,chy,chc, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, addjris ) - -// tadds_mxn unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - constexpr auto M = 4; \ - constexpr auto N = 4; \ -\ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto xmn = tile( x ); \ - auto ymn = tile( y ); \ -\ - INFO( "row-major" ); \ -\ - auto ymn0 = ymn; \ - axpbys_mxn( 1.0, xmn, 1.0, ymn0, dense ); \ -\ - INFO( "x:\n" << xmn ); \ - INFO( "y (init):\n" << ymn ); \ -\ - bli_tadds_mxn( chx,chy,chc, M, N, &xmn[0][0], N, 1, &ymn[0][0], N, 1 ); \ -\ - INFO( "y (C++):\n" << ymn0 ); \ - INFO( "y (BLIS):\n" << ymn ); \ -\ - check( ymn, ymn0 ); \ - } \ -\ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto xmn = tile( x ); \ - auto ymn = tile( y ); \ -\ - INFO( "column-major" ); \ -\ - auto ymn0 = ymn; \ - axpbys_mxn( 1.0, xmn, 1.0, ymn0, dense ); \ -\ - INFO( "x:\n" << xmn ); \ - INFO( "y (init):\n" << ymn ); \ -\ - bli_tadds_mxn( chx,chy,chc, N, M, &xmn[0][0], 1, N, &ymn[0][0], 1, N ); \ -\ - INFO( "y (C++):\n" << ymn0 ); \ - INFO( "y (BLIS):\n" << ymn ); \ -\ - check( ymn, ymn0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, adds_mxn ) - -/****************************************************************************** - * - * axpbys - * - *****************************************************************************/ - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chb,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( a ) * \ - convert_prec( x ) + \ - convert_prec( b ) * \ - convert_prec( y ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ - INFO( "b: " << b ); \ - INFO( "y (init): " << y ); \ -\ - bli_taxpbys( cha,chx,chb,chy,chc, a, x, b, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, C, axpbys ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chb,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( a ) * \ - conj( convert_prec( x ) ) + \ - convert_prec( b ) * \ - convert_prec( y ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ - INFO( "b: " << b ); \ - INFO( "y (init): " << y ); \ -\ - bli_taxpbyjs( cha,chx,chb,chy,chc, a, x, b, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, C, axpbyjs ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chb,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( a ) * \ - convert_prec( x ) + \ - convert_prec( b ) * \ - convert_prec( y ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ - INFO( "b: " << b ); \ - INFO( "y (init): " << y ); \ -\ - bli_taxpbyris( cha,chx,chb,chy,chc, \ - real( a ), imag( a ), \ - real( x ), imag( x ), \ - real( b ), imag( b ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, C, axpbyris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chb,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( a ) * \ - conj( convert_prec( x ) ) + \ - convert_prec( b ) * \ - convert_prec( y ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ - INFO( "b: " << b ); \ - INFO( "y (init): " << y ); \ -\ - bli_taxpbyjris( cha,chx,chb,chy,chc, \ - real( a ), imag( a ), \ - real( x ), imag( x ), \ - real( b ), imag( b ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, C, axpbyjris ) - -/****************************************************************************** - * - * axpys - * - *****************************************************************************/ - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( a ) * \ - convert_prec( x ) + \ - convert_prec( y ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ - INFO( "y (init): " << y ); \ -\ - bli_taxpys( cha,chx,chy,chc, a, x, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, axpys ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( a ) * \ - conj( convert_prec( x ) ) + \ - convert_prec( y ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ - INFO( "y (init): " << y ); \ -\ - bli_taxpyjs( cha,chx,chy,chc, a, x, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, axpyjs ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( a ) * \ - convert_prec( x ) + \ - convert_prec( y ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ - INFO( "y (init): " << y ); \ -\ - bli_taxpyris( cha,chx,chy,chc, \ - real( a ), imag( a ), \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, axpyris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( a ) * \ - conj( convert_prec( x ) ) + \ - convert_prec( y ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ - INFO( "y (init): " << y ); \ -\ - bli_taxpyjris( cha,chx,chy,chc, \ - real( a ), imag( a ), \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, axpyjris ) - -/****************************************************************************** - * - * conjs - * - *****************************************************************************/ - -#undef GENTFUNC -#define GENTFUNC( opname, ctypey, chy ) \ -UNIT_TEST(chy,opname) \ -( \ - for ( auto y : test_values() ) \ - { \ - auto y0 = conj( y ); \ -\ - INFO( "y (init): " << y ); \ -\ - bli_tconjs( chy, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX1( C, conjs ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypey, chy ) \ -UNIT_TEST(chy,opname) \ -( \ - for ( auto y : test_values() ) \ - { \ - auto y0 = conj( y ); \ -\ - INFO( "y (init): " << y ); \ -\ - bli_tconjris( chy, real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX1( C, conjris ) - -/****************************************************************************** - * - * copycjs - * - *****************************************************************************/ - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( bli_is_conj( conjx ) ? conj( x ) : x ); \ -\ - INFO( "conjx: " << bli_is_conj( conjx ) ); \ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tcopycjs( chx,chy, conjx, x, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, copycjs ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( bli_is_conj( conjx ) ? conj( x ) : x ); \ -\ - INFO( "conjx: " << bli_is_conj( conjx ) ); \ - INFO( "x: " << x); \ -\ - ctypey y; \ - bli_tcopycjris( chx,chy, conjx, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, copycjris ) - -/****************************************************************************** - * - * copynzs - * - *****************************************************************************/ - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = y; \ - real( y0 ) = real( x ); \ - if ( is_complex::value ) \ - imag( y0 ) = imag( x ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (orig): " << y ); \ -\ - bli_tcopynzs( chx,chy, x, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, copynzs ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = y; \ - real( y0 ) = real( x ); \ - if ( is_complex::value ) \ - imag( y0 ) = -imag( x ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (orig): " << y ); \ -\ - bli_tcopynzjs( chx,chy, x, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, copynzjs ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = y; \ - real( y0 ) = real( x ); \ - if ( is_complex::value ) \ - imag( y0 ) = imag( x ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (orig): " << y ); \ -\ - bli_tcopynzris( chx,chy, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, copynzris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = y; \ - real( y0 ) = real( x ); \ - if ( is_complex::value ) \ - imag( y0 ) = -imag( x ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (orig): " << y ); \ -\ - bli_tcopynzjris( chx,chy, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, copynzjris ) - -/****************************************************************************** - * - * copys - * - *****************************************************************************/ - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( x ); \ -\ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tcopys( chx,chy, x, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, copys ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( conj( x ) ); \ -\ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tcopyjs( chx,chy, x, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, copyjs ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( x ); \ -\ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tcopyris( chx,chy, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, copyris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( conj( x ) ); \ -\ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tcopyjris( chx,chy, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, copyjris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - constexpr auto M = 4; \ - constexpr auto N = 4; \ -\ - for ( auto x : test_values() ) \ - { \ - auto xmn = tile( x ); \ - auto ymn = tile(); \ -\ - INFO( "row-major" ); \ -\ - auto ymn0 = ymn; \ - axpbys_mxn( 1.0, xmn, 0.0, ymn0, dense ); \ -\ - INFO( "x:\n" << xmn ); \ -\ - bli_tcopys_mxn( chx,chy, M, N, &xmn[0][0], N, 1, &ymn[0][0], N, 1 ); \ -\ - INFO( "y (C++):\n" << ymn0 ); \ - INFO( "y (BLIS):\n" << ymn ); \ -\ - check( ymn, ymn0 ); \ - } \ -\ - for ( auto x : test_values() ) \ - { \ - auto xmn = tile( x ); \ - auto ymn = tile(); \ -\ - INFO( "column-major" ); \ -\ - auto ymn0 = ymn; \ - axpbys_mxn( 1.0, xmn, 0.0, ymn0, dense ); \ -\ - INFO( "x:\n" << xmn ); \ -\ - bli_tcopys_mxn( chx,chy, N, M, &xmn[0][0], 1, N, &ymn[0][0], 1, N ); \ -\ - INFO( "y (C++):\n" << ymn0 ); \ - INFO( "y (BLIS):\n" << ymn ); \ -\ - check( ymn, ymn0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, copys_mxn ) - -/****************************************************************************** - * - * dots - * - *****************************************************************************/ - -// No tests, dot(x, y, a) == axpy(y, x, a) - -/****************************************************************************** - * - * eqs - * - *****************************************************************************/ - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto expected = convert_prec( x ) == \ - convert_prec( y ); \ -\ - INFO( "x: " << x ); \ - INFO( "y: " << y ); \ -\ - auto found = bli_teqs( chx,chy,chc, x, y ); \ -\ - INFO( "expected: " << expected ); \ - INFO( "found : " << found ); \ -\ - REQUIRE( expected == found ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, RC, eqs ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto expected = convert_prec( x ) == \ - convert_prec( y ); \ -\ - INFO( "x: " << x ); \ - INFO( "y: " << y ); \ -\ - auto found = bli_teqris( chx,chy,chc, \ - real(x), imag(x), \ - real(y), imag(y) ); \ -\ - INFO( "expected: " << expected ); \ - INFO( "found : " << found ); \ -\ - REQUIRE( expected == found ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, RC, eqris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx ) \ -UNIT_TEST(chx,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto expected = x == convert_prec( 1.0 ); \ -\ - INFO( "x: " << x ); \ -\ - auto found = bli_teq1ris( chx, real( x ), imag( x ) ); \ -\ - INFO( "expected: " << expected ); \ - INFO( "found : " << found ); \ -\ - REQUIRE( expected == found ); \ - } \ -) - -INSERT_GENTFUNC_MIX1( RC, eq1ris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx ) \ -UNIT_TEST(chx,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto expected = x == convert_prec( 0.0 ); \ -\ - INFO( "x: " << x ); \ -\ - auto found = bli_teq0ris( chx, real( x ), imag( x ) ); \ -\ - INFO( "expected: " << expected ); \ - INFO( "found : " << found ); \ -\ - REQUIRE( expected == found ); \ - } \ -) - -INSERT_GENTFUNC_MIX1( RC, eq0ris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx ) \ -UNIT_TEST(chx,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto expected = x == convert_prec( -1.0 ); \ -\ - INFO( "x: " << x ); \ -\ - auto found = bli_teqm1ris( chx, real( x ), imag( x ) ); \ -\ - INFO( "expected: " << expected ); \ - INFO( "found : " << found ); \ -\ - REQUIRE( expected == found ); \ - } \ -) - -INSERT_GENTFUNC_MIX1( RC, eqm1ris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx ) \ -UNIT_TEST(chx,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto expected = x == convert_prec( 1.0 ); \ -\ - INFO( "x: " << x ); \ -\ - auto found = bli_teq1s( chx, x ); \ -\ - INFO( "expected: " << expected ); \ - INFO( "found : " << found ); \ -\ - REQUIRE( expected == found ); \ - } \ -) - -INSERT_GENTFUNC_MIX1( RC, eq1s ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx ) \ -UNIT_TEST(chx,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto expected = x == convert_prec( 0.0 ); \ -\ - INFO( "x: " << x ); \ -\ - auto found = bli_teq0s( chx, x ); \ -\ - INFO( "expected: " << expected ); \ - INFO( "found : " << found ); \ -\ - REQUIRE( expected == found ); \ - } \ -) - -INSERT_GENTFUNC_MIX1( RC, eq0s ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx ) \ -UNIT_TEST(chx,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto expected = x == convert_prec( -1.0 ); \ -\ - INFO( "x: " << x ); \ -\ - auto found = bli_teqm1s( chx, x ); \ -\ - INFO( "expected: " << expected ); \ - INFO( "found : " << found ); \ -\ - REQUIRE( expected == found ); \ - } \ -) - -INSERT_GENTFUNC_MIX1( RC, eqm1s ) - -/****************************************************************************** - * - * fprints - * - *****************************************************************************/ - -// No tests - -/****************************************************************************** - * - * gets - * - *****************************************************************************/ - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ -\ - using ctypeyr = make_real_t; \ - using ctypeyc = make_complex_t; \ -\ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( x ); \ -\ - INFO( "x: " << x ); \ -\ - ctypeyr yr, yi; \ - bli_tgets( chx,chy, x, yr, yi ); \ -\ - INFO( "yr (C++): " << real( y0 ) ); \ - INFO( "yi (C++): " << imag( y0 ) ); \ - INFO( "yr (BLIS): " << yr ); \ - INFO( "yi (BLIS): " << yi ); \ -\ - check( yr, real( y0 ) ); \ - check( yi, imag( y0 ) ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, RC, gets ) - -/****************************************************************************** - * - * inverts - * - *****************************************************************************/ - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(chx,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( convert_prec( 1.0 ) / \ - convert_prec( x ) ); \ -\ - INFO( "x: " << x ); \ -\ - ctypex y = x; \ - bli_tinverts( chx,chc, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, inverts ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(chx,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( convert_prec( 1.0 ) / \ - convert_prec( x ) ); \ -\ - INFO( "x: " << x ); \ -\ - ctypex y = x; \ - bli_tinvertris( chx,chc, real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, invertris ) - -/****************************************************************************** - * - * invscals - * - *****************************************************************************/ - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(cha,chx,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( convert_prec( x ) / \ - convert_prec( a ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypex y = x; \ - bli_tinvscals( cha,chx,chc, a, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, invscals ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(cha,chx,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( convert_prec( x ) / \ - convert_prec( conj( a ) ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypex y = x; \ - bli_tinvscaljs( cha,chx,chc, a, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, invscaljs ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(cha,chx,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( convert_prec( x ) / \ - convert_prec( a ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypex y = x; \ - bli_tinvscalris( cha,chx,chc, \ - real( a ), imag( a ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, invscalris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(cha,chx,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( convert_prec( x ) / \ - convert_prec( conj( a ) ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypex y = x; \ - bli_tinvscaljris( cha,chx,chc, \ - real( a ), imag( a ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, invscaljris ) - -/****************************************************************************** - * - * neg2s - * - *****************************************************************************/ - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( -x ); \ -\ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tneg2s( chx,chy, x, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, neg2s ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( -x ); \ -\ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tneg2ris( chx,chy, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, C, neg2ris ) - -/****************************************************************************** - * - * randnp2s - * - *****************************************************************************/ - -// No tests - -/****************************************************************************** - * - * rands - * - *****************************************************************************/ - -// No tests - -/****************************************************************************** - * - * scal2s - * - *****************************************************************************/ - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( convert_prec( a ) * \ - convert_prec( x ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tscal2s( cha,chx,chy,chc, a, x, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, scal2s ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( convert_prec( a ) * \ - convert_prec( conj( x ) ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tscal2js( cha,chx,chy,chc, a, x, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, scal2js ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( convert_prec( a ) * \ - convert_prec( x ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tscal2ris( cha,chx,chy,chc, \ - real( a ), imag( a ), \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, scal2ris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( convert_prec( a ) * \ - convert_prec( conj( x ) ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tscal2jris( cha,chx,chy,chc, \ - real( a ), imag( a ), \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, scal2jris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - constexpr auto M = 4; \ - constexpr auto N = 4; \ -\ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto xmn = tile( x ); \ - auto ymn = tile(); \ -\ - INFO( "row-major" ); \ -\ - auto ymn0 = ymn; \ - axpbys_mxn( a, bli_is_conj( conjx ) ? conj( xmn ) : xmn, 0.0, ymn0, dense ); \ -\ - INFO( "conjx: " << bli_is_conj( conjx ) ); \ - INFO( "a: " << a ); \ - INFO( "x:\n" << xmn ); \ -\ - bli_tscal2s_mxn( cha,chx,chy,chc, conjx, M, N, &a, &xmn[0][0], N, 1, &ymn[0][0], N, 1 ); \ -\ - INFO( "y (C++):\n" << ymn0 ); \ - INFO( "y (BLIS):\n" << ymn ); \ -\ - check( ymn, ymn0 ); \ - } \ -\ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto xmn = tile( x ); \ - auto ymn = tile(); \ -\ - INFO("column-major"); \ -\ - auto ymn0 = ymn; \ - axpbys_mxn( a, bli_is_conj( conjx ) ? conj( xmn ) : xmn, 0.0, ymn0, dense ); \ -\ - INFO( "conjx: " << bli_is_conj( conjx ) ); \ - INFO( "a: " << a ); \ - INFO( "x:\n" << xmn ); \ -\ - bli_tscal2s_mxn( cha,chx,chy,chc, conjx, N, M, &a, &xmn[0][0], 1, N, &ymn[0][0], 1, N ); \ -\ - INFO( "y (C++):\n" << ymn0 ); \ - INFO( "y (BLIS):\n" << ymn ); \ -\ - check( ymn, ymn0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, scal2s_mxn ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - constexpr auto M = 4; \ - constexpr auto N = 4; \ -\ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto xmn = tile( x ); \ - auto ymn = tile(); \ -\ - INFO( "row-major" ); \ -\ - auto ymn0 = ymn; \ - axpbys_mxn( a, bli_is_conj( conjx ) \ - ? conj( xmn ) \ - : xmn, \ - 0.0, ymn0, dense ); \ -\ - INFO( "conjx: " << bli_is_conj( conjx ) ); \ - INFO( "a: " << a ); \ - INFO( "x:\n" << xmn ); \ -\ - bli_tscal2ris_mxn( cha,chx,chy,chc, conjx, \ - M, N, &a, \ - &xmn[0][0], N, 1, \ - &ymn[0][0], 2*N, 2, 1 ); \ -\ - INFO( "y (C++):\n" << ymn0 ); \ - INFO( "y (BLIS):\n" << ymn ); \ -\ - check( ymn, ymn0 ); \ - } \ -\ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto xmn = tile( x ); \ - auto ymn = tile(); \ -\ - INFO( "column-major" ); \ -\ - auto ymn0 = ymn; \ - axpbys_mxn( a, bli_is_conj( conjx ) \ - ? conj( xmn ) \ - : xmn, \ - 0.0, ymn0, dense ); \ -\ - INFO( "conjx: " << bli_is_conj( conjx ) ); \ - INFO( "a: " << a ); \ - INFO( "x:\n" << xmn ); \ -\ - bli_tscal2ris_mxn( cha,chx,chy,chc, \ - conjx, N, M, &a, \ - &xmn[0][0], 1, N, \ - &ymn[0][0], 2, 2*N, 1 ); \ -\ - INFO( "y (C++):\n" << ymn0 ); \ - INFO( "y (BLIS):\n" << ymn ); \ -\ - check( ymn, ymn0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, C, C, C, scal2ris_mxn_1 ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - constexpr auto M = 4; \ - constexpr auto N = 4; \ -\ - using ctypeyr = make_real_t; \ -\ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto xmn = tile( x ); \ - auto yrmn = tile(); \ - auto yimn = tile(); \ -\ - INFO( "row-major" ); \ -\ - auto ymn0 = tile(); \ - axpbys_mxn( a, bli_is_conj( conjx ) \ - ? conj( xmn ) \ - : xmn, \ - 0.0, ymn0, dense ); \ - auto yrmn0 = real( ymn0 ); \ - auto yimn0 = imag( ymn0 ); \ -\ - INFO( "conjx: " << bli_is_conj( conjx ) ); \ - INFO( "a: " << a ); \ - INFO( "x:\n" << xmn ); \ -\ - bli_tscal2ris_mxn( cha,chx,chy,chc, \ - conjx, M, N, &a, \ - &xmn[0][0], N, 1, \ - &yrmn[0][0], N, 1, \ - &yimn[0][0] - &yrmn[0][0] ); \ -\ - INFO( "yr (C++):\n" << yrmn0 ); \ - INFO( "yi (C++):\n" << yimn0 ); \ - INFO( "yr (BLIS):\n" << yrmn ); \ - INFO( "yi (BLIS):\n" << yimn ); \ -\ - check( yrmn, yrmn0 ); \ - check( yimn, yimn0 ); \ - } \ -\ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto xmn = tile( x ); \ - auto yrmn = tile(); \ - auto yimn = tile(); \ -\ - INFO( "column-major" ); \ -\ - auto ymn0 = tile(); \ - axpbys_mxn( a, bli_is_conj( conjx ) \ - ? conj( xmn ) \ - : xmn, \ - 0.0, ymn0, dense ); \ - auto yrmn0 = real( ymn0 ); \ - auto yimn0 = imag( ymn0 ); \ -\ - INFO( "conjx: " << bli_is_conj( conjx ) ); \ - INFO( "a: " << a ); \ - INFO( "x:\n" << xmn ); \ -\ - bli_tscal2ris_mxn( cha,chx,chy,chc, \ - conjx, N, M, &a, \ - &xmn[0][0], 1, N, \ - &yrmn[0][0], 1, N, \ - &yimn[0][0] - &yrmn[0][0] ); \ -\ - INFO( "yr (C++):\n" << yrmn0 ); \ - INFO( "yi (C++):\n" << yimn0 ); \ - INFO( "yr (BLIS):\n" << yrmn ); \ - INFO( "yi (BLIS):\n" << yimn ); \ -\ - check( yrmn, yrmn0 ); \ - check( yimn, yimn0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, C, C, C, scal2ris_mxn_k ) - -/****************************************************************************** - * - * scals - * - *****************************************************************************/ - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(cha,chx,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( convert_prec( a ) * \ - convert_prec( x ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypex y = x; \ - bli_tscals( cha,chx,chc, a, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, scals ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(cha,chx,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( convert_prec( conj( a ) ) * \ - convert_prec( x ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypex y = x; \ - bli_tscaljs( cha,chx,chc, a, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, scaljs ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(cha,chx,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( convert_prec( a ) * \ - convert_prec( x ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypex y = x; \ - bli_tscalris( cha,chx,chc, \ - real( a ), imag( a ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, scalris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(cha,chx,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( convert_prec( conj( a ) ) * \ - convert_prec( x ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypex y = x; \ - bli_tscaljris( cha,chx,chc, \ - real( a ), imag( a ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, scaljris ) - -// xpbys_mxn_uplo unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(cha,chx,chc,opname) \ -( \ - constexpr auto M = 4; \ - constexpr auto N = 4; \ -\ - for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto diagoff : { -1, 0, 1 } ) \ - { \ - auto xmn = tile( x ); \ -\ - INFO( "row-major" ); \ -\ - std::function func = is_below( diagoff ); \ - if ( uplo == BLIS_UPPER ) func = is_above( diagoff ); \ -\ - auto xmn0 = xmn; \ - axpbys_mxn( a, xmn, 0.0, xmn0, func ); \ -\ - INFO( "upper: " << ( uplo == BLIS_UPPER ) ); \ - INFO( "diagoff: " << diagoff ); \ - INFO( "a: " << a ); \ - INFO( "x (init):\n" << xmn ); \ -\ - bli_tscalris_mxn_uplo( cha,chx,chc, uplo, diagoff, M, N, \ - &real( a ), &real( a )+1, \ - &real( xmn[0][0] ), &real( xmn[0][0] )+1, \ - &real( xmn[1][0] ) - &real( xmn[0][0] ), \ - &real( xmn[0][1] ) - &real( xmn[0][0] ) ); \ -\ - INFO( "x (C++):\n" << xmn0 ); \ - INFO( "x (BLIS):\n" << xmn ); \ -\ - check( xmn, xmn0 ); \ - } \ -\ - for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto diagoff : { -1, 0, 1 } ) \ - { \ - auto xmn = tile( x ); \ -\ - INFO( "column-major" ); \ -\ - std::function func = is_below( diagoff ); \ - if ( uplo == BLIS_UPPER ) func = is_above( diagoff ); \ -\ - auto xmn0 = xmn; \ - axpbys_mxn( a, xmn, 0.0, xmn0, func ); \ -\ - INFO( "upper: " << ( uplo == BLIS_UPPER ) ); \ - INFO( "diagoff: " << diagoff ); \ - INFO( "a: " << a ); \ - INFO( "x (init):\n" << xmn ); \ -\ - bli_tscalris_mxn_uplo( cha,chx,chc, uplo, diagoff, N, M, \ - &real( a ), &real( a )+1, \ - &real( xmn[0][0] ), &real( xmn[0][0] )+1, \ - &real( xmn[0][1] ) - &real( xmn[0][0] ), \ - &real( xmn[1][0] ) - &real( xmn[0][0] ) ); \ -\ - INFO( "x (C++):\n" << xmn0 ); \ - INFO( "x (BLIS):\n" << xmn ); \ -\ - check( xmn, xmn0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, scalris_mxn_uplo ) - -/****************************************************************************** - * - * sets - * - *****************************************************************************/ - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( x ); \ -\ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tsets( chx,chy, real( x ), imag( x ), y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, RC, sets ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = y; \ - real( y0 ) = convert_prec( real( x ) ); \ -\ - INFO( "x: " << x ); \ -\ - bli_tsetrs( chx,chy, real( x ), y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, RC, setrs ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = y; \ - imag( y0 ) = convert_prec( imag( x ) ); \ -\ - INFO( "x: " << x ); \ -\ - bli_tsetis( chx,chy, imag( x ), y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, RC, setis ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( x ); \ -\ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tsetris( chx,chy, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, RC, setris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypey, chy ) \ -UNIT_TEST(chy,opname) \ -( \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( 0.0 ); \ -\ - INFO( "y (init): " << y ); \ -\ - bli_tset0s( chy, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX1( RC, set0s ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypey, chy ) \ -UNIT_TEST(chy,opname) \ -( \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( 1.0 ); \ -\ - INFO( "y (init): " << y ); \ -\ - bli_tset1s( chy, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX1( RC, set1s ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypey, chy ) \ -UNIT_TEST(chy,opname) \ -( \ - for ( auto y : test_values() ) \ - { \ - auto y0 = y; \ - real( y0 ) = convert_prec( 0.0 ); \ -\ - INFO( "y (init): " << y ); \ -\ - bli_tsetr0s( chy, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX1( RC, setr0s ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypey, chy ) \ -UNIT_TEST(chy,opname) \ -( \ - for ( auto y : test_values() ) \ - { \ - auto y0 = y; \ - imag( y0 ) = convert_prec( 0.0 ); \ -\ - INFO( "y (init): " << y ); \ -\ - bli_tseti0s( chy, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX1( RC, seti0s ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypey, chy ) \ -UNIT_TEST(chy,opname) \ -( \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( 0.0 ); \ -\ - bli_tset0ris( chy, real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX1( RC, set0ris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypey, chy ) \ -UNIT_TEST(chy,opname) \ -( \ - constexpr auto M = 4; \ - constexpr auto N = 4; \ - \ - for ( auto y : test_values() ) \ - { \ - auto ymn = tile( y ); \ -\ - INFO( "row-major" ); \ -\ - auto ymn0 = tile( convert( 0.0 ) ); \ -\ - INFO( "y (init):\n" << ymn); \ -\ - bli_tset0s_mxn( chy, M, N, &ymn[0][0], N, 1 ); \ -\ - INFO( "y (C++):\n" << ymn0 ); \ - INFO( "y (BLIS):\n" << ymn ); \ -\ - check( ymn, ymn0 ); \ - } \ - \ - for ( auto y : test_values() ) \ - { \ - auto ymn = tile( y ); \ -\ - INFO( "column-major" ); \ -\ - auto ymn0 = tile( convert( 0.0 ) ); \ -\ - INFO( "y (init):\n" << ymn ); \ -\ - bli_tset0s_mxn( chy, N, M, &ymn[0][0], 1, N ); \ -\ - INFO( "y (C++):\n" << ymn0 ); \ - INFO( "y (BLIS):\n" << ymn ); \ -\ - check( ymn, ymn0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX1( RC, set0s_mxn ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypey, chy ) \ -UNIT_TEST(chy,opname) \ -( \ - /* TODO */ \ -) - -//INSERT_GENTFUNC_MIX1( C, set0bbs_mxn ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypey, chy ) \ -UNIT_TEST(cha,chy,opname) \ -( \ - /* TODO */ \ -) - -//INSERT_GENTFUNC_MIX2( RC, C, set1ms_mxn_diag ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypey, chy ) \ -UNIT_TEST(cha,chy,opname) \ -( \ - /* TODO */ \ -) - -//INSERT_GENTFUNC_MIX2( RC, C, set1ms_mxn_uplo ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypey, chy ) \ -UNIT_TEST(cha,chy,opname) \ -( \ - /* TODO */ \ -) - -//INSERT_GENTFUNC_MIX2( RC, C, set1ms_mxn ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypey, chy ) \ -UNIT_TEST(chy,opname) \ -( \ - /* TODO */ \ -) - -//INSERT_GENTFUNC_MIX1( C, seti01ms_mxn_diag ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chy,chc,opname) \ -( \ - /* TODO */ \ -) - -//INSERT_GENTFUNC_MIX3( RC, RC, C, setrihs_mxn_diag ) - -/****************************************************************************** - * - * sqrt2s - * - *****************************************************************************/ - -// tsqrt2s unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( square_root( convert_prec( x ) ) ); \ -\ - ctypey y; \ - bli_tsqrt2s( chx,chy,chc, x, y ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( R, R, R, sqrt2s ) - -// tsqrt2ris unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( square_root( convert_prec( x ) ) ); \ -\ - ctypey y; \ - bli_tsqrt2ris( chx,chy,chc, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( R, R, R, sqrt2ris ) - -/****************************************************************************** - * - * subs - * - *****************************************************************************/ - -// tsubs unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( y ) - \ - convert_prec( x ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (init): " << y ); \ -\ - bli_tsubs( chx,chy,chc, x, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, subs ) - -// tsubjs unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( y ) - \ - conj( convert_prec( x ) ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (init): " << y ); \ -\ - bli_tsubjs( chx,chy,chc, x, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, subjs ) - -// tsubris unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( y ) - \ - convert_prec( x ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (init): " << y ); \ -\ - bli_tsubris( chx,chy,chc, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, subris ) - -// tsubjris unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( y ) - \ - conj( convert_prec( x ) ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y (init): " << y ); \ -\ - bli_tsubjris( chx,chy,chc, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, RC, C, subjris ) - -/****************************************************************************** - * - * swaps - * - *****************************************************************************/ - -// tswaps unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto x0 = convert( y ); \ - auto y0 = convert( x ); \ -\ - INFO( "x (init): " << x ); \ - INFO( "y (init): " << y ); \ -\ - bli_tswaps( chx,chy, x, y ); \ -\ - INFO( "x (C++): " << x0 ); \ - INFO( "y (C++): " << y0 ); \ - INFO( "x (BLIS): " << x ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( x, x0 ); \ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, RC, swaps ) - -// tswapris unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto x0 = convert( y ); \ - auto y0 = convert( x ); \ -\ - INFO( "x (init): " << x ); \ - INFO( "y (init): " << y ); \ -\ - bli_tswapris( chx,chy, \ - real( x ), imag( x ), \ - real( y ), imag( y ) ); \ -\ - INFO( "x (C++): " << x0 ); \ - INFO( "y (C++): " << y0 ); \ - INFO( "x (BLIS): " << x ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( x, x0 ); \ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( RC, RC, swapris ) - -/****************************************************************************** - * - * xpbys - * - *****************************************************************************/ - -// txpbys unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chb,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( x ) + \ - convert_prec( b ) * \ - convert_prec( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "b: " << b ); \ - INFO( "y (init): " << y ); \ -\ - bli_txpbys( chx,chb,chy,chc, x, b, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, xpbys ) - -// txpbyjs unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chb,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( conj( convert_prec( x ) ) + \ - convert_prec( b ) * \ - convert_prec( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "b: " << b ); \ - INFO( "y (init): " << y ); \ -\ - bli_txpbyjs( chx,chb,chy,chc, x, b, y ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, xpbyjs ) - -// txpbyris unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chb,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( convert_prec( x ) + \ - convert_prec( b ) * \ - convert_prec( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "b: " << b ); \ - INFO( "y (init): " << y ); \ -\ - bli_txpbyris( chx,chb,chy,chc, \ - real( x ), imag( x ), \ - real( b ), imag( b ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, xpbyris ) - -// txpbyjris -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chb,chy,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto y0 = convert( conj( convert_prec( x ) ) + \ - convert_prec( b ) * \ - convert_prec( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "b: " << b ); \ - INFO( "y (init): " << y ); \ -\ - bli_txpbyjris( chx,chb,chy,chc, \ - real( x ), imag( x ), \ - real( b ), imag( b ), \ - real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, xpbyjris ) - -// xpbys_mxn unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chb,chy,chc,opname) \ -( \ - constexpr auto M = 4; \ - constexpr auto N = 4; \ -\ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto xmn = tile( x ); \ - auto ymn = tile( y ); \ -\ - INFO( "row-major" ); \ -\ - auto ymn0 = ymn; \ - axpbys_mxn( 1.0, xmn, b, ymn0, dense ); \ -\ - INFO( "x:\n" << xmn ); \ - INFO( "b: " << b ); \ - INFO( "y (init):\n" << ymn ); \ -\ - bli_txpbys_mxn( chx,chb,chy,chc, M, N, &xmn[0][0], N, 1, &b, &ymn[0][0], N, 1 ); \ -\ - INFO( "y (C++):\n" << ymn0 ); \ - INFO( "y (BLIS):\n" << ymn ); \ -\ - check( ymn, ymn0 ); \ - } \ -\ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto xmn = tile( x ); \ - auto ymn = tile( y ); \ -\ - INFO( "column-major" ); \ -\ - auto ymn0 = ymn; \ - axpbys_mxn( 1.0, xmn, b, ymn0, dense ); \ -\ - INFO( "x:\n" << xmn ); \ - INFO( "b: " << b ); \ - INFO( "y (init):\n" << ymn ); \ -\ - bli_txpbys_mxn( chx,chb,chy,chc, N, M, &xmn[0][0], 1, N, &b, &ymn[0][0], 1, N ); \ - INFO( "y (C++):\n" << ymn0 ); \ - INFO( "y (BLIS):\n" << ymn ); \ -\ - check( ymn, ymn0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, xpbys_mxn ) - -// xpbys_mxn_uplo unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(chx,chb,chy,chc,opname) \ -( \ - constexpr auto M = 4; \ - constexpr auto N = 4; \ -\ - for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ - for ( auto diagoff : { -1, 0, 1 } ) \ - { \ - auto xmn = tile( x ); \ - auto ymn = tile( y ); \ -\ - INFO( "row-major" ); \ -\ - std::function func = is_below( diagoff ); \ - if ( uplo == BLIS_UPPER ) func = is_above( diagoff ); \ -\ - auto ymn0 = ymn; \ - axpbys_mxn( 1.0, xmn, b, ymn0, func ); \ -\ - INFO( "upper: " << ( uplo == BLIS_UPPER ) ); \ - INFO( "diagoff: " << diagoff ); \ - INFO( "x:\n" << xmn ); \ - INFO( "b: " << b ); \ - INFO( "y (init):\n" << ymn ); \ -\ - bli_txpbys_mxn_uplo( chx,chb,chy,chc, diagoff, uplo, M, N, &xmn[0][0], N, 1, &b, &ymn[0][0], N, 1 ); \ -\ - INFO( "y (C++):\n" << ymn0 ); \ - INFO( "y (BLIS):\n" << ymn ); \ -\ - check( ymn, ymn0 ); \ - } \ -\ - for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ - for ( auto diagoff : { -1, 0, 1 } ) \ - { \ - auto xmn = tile( x ); \ - auto ymn = tile( y ); \ -\ - INFO( "column-major" ); \ -\ - std::function func = is_below( diagoff ); \ - if ( uplo == BLIS_UPPER ) func = is_above( diagoff ); \ -\ - auto ymn0 = ymn; \ - axpbys_mxn( 1.0, xmn, b, ymn0, func ); \ -\ - INFO( "upper: " << ( uplo == BLIS_UPPER ) ); \ - INFO( "diagoff: " << diagoff ); \ - INFO( "x:\n" << xmn ); \ - INFO( "b: " << b ); \ - INFO( "y (init):\n" << ymn ); \ -\ - bli_txpbys_mxn_uplo( chx,chb,chy,chc, diagoff, uplo, N, M, &xmn[0][0], 1, N, &b, &ymn[0][0], 1, N ); \ -\ - INFO( "y (C++):\n" << ymn0 ); \ - INFO( "y (BLIS):\n" << ymn ); \ -\ - check( ymn, ymn0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, RC, RC, C, xpbys_mxn_uplo ) - -/****************************************************************************** - * - * copy1es - * - *****************************************************************************/ - -// tcopy1es unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto yri0 = convert( x ); \ - auto yir0 = convert( swapri( conj( x ) ) ); \ -\ - INFO( "x: " << x ); \ -\ - ctypey yri, yir; \ - bli_tcopy1es( chx,chy, x, yri, yir ); \ -\ - INFO( "yri (C++): " << yri0 ); \ - INFO( "yir (C++): " << yir0 ); \ - INFO( "yri (BLIS): " << yri ); \ - INFO( "yir (BLIS): " << yir ); \ -\ - check( yri, yri0 ); \ - check( yir, yir0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( C, C, copy1es ) - -// tcopyj1es unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto yri0 = convert( conj( x ) ); \ - auto yir0 = convert( swapri( x ) ); \ -\ - INFO( "x: " << x ); \ -\ - ctypey yri, yir; \ - bli_tcopyj1es( chx,chy, x, yri, yir ); \ -\ - INFO( "yri (C++): " << yri0 ); \ - INFO( "yir (C++): " << yir0 ); \ - INFO( "yri (BLIS): " << yri ); \ - INFO( "yir (BLIS): " << yir ); \ -\ - check( yri, yri0 ); \ - check( yir, yir0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( C, C, copyj1es ) - -/****************************************************************************** - * - * invert1es - * - *****************************************************************************/ - -// tinvert1es unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(chx,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto xri = x; \ - auto xir = swapri( conj( x ) ); \ -\ - auto xri0 = convert( convert_prec( 1.0 ) / \ - convert_prec( x ) ); \ - auto xir0 = swapri( conj( xri0 ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "xri (orig): " << xri ); \ - INFO( "xir (orig): " << xir ); \ -\ - bli_tinvert1es( chx,chc, xri, xir ); \ -\ - INFO( "xri (C++): " << xri0 ); \ - INFO( "xir (C++): " << xir0 ); \ - INFO( "xri (BLIS): " << xri ); \ - INFO( "xir (BLIS): " << xir ); \ -\ - check( xri, xri0 ); \ - check( xir, xir0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( C, C, invert1es ) - -/****************************************************************************** - * - * scal21es - * - *****************************************************************************/ - -// tscal21es unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto yri0 = convert( convert_prec( a ) * \ - convert_prec( x ) ); \ - auto yir0 = swapri( conj( yri0 ) ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypey yri, yir; \ - bli_tscal21es( cha,chx,chy,chc, a, x, yri, yir ); \ -\ - INFO( "yri (C++): " << yri0 ); \ - INFO( "yir (C++): " << yir0 ); \ - INFO( "yri (BLIS): " << yri ); \ - INFO( "yir (BLIS): " << yir ); \ -\ - check( yri, yri0 ); \ - check( yir, yir0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, C, C, C, scal21es ) - -// tscal2j1es unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto yri0 = convert( convert_prec( a ) * \ - convert_prec( conj( x ) ) ); \ - auto yir0 = swapri( conj( yri0 ) ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypey yri, yir; \ - bli_tscal2j1es( cha,chx,chy,chc, a, x, yri, yir ); \ -\ - INFO( "yri (C++): " << yri0 ); \ - INFO( "yir (C++): " << yir0 ); \ - INFO( "yri (BLIS): " << yri ); \ - INFO( "yir (BLIS): " << yir ); \ -\ - check( yri, yri0 ); \ - check( yir, yir0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, C, C, C, scal2j1es ) - -/****************************************************************************** - * - * scal1es - * - *****************************************************************************/ - -// tscal1es unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(cha,chx,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto xri = x; \ - auto xir = swapri( conj( x ) ); \ -\ - auto xri0 = convert( convert_prec( a ) * \ - convert_prec( x ) ); \ - auto xir0 = swapri( conj( xri0 ) ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ - INFO( "xri (orig): " << xri ); \ - INFO( "xir (orig): " << xir ); \ -\ - bli_tscal1es( cha,chx,chc, a, xri, xir ); \ -\ - INFO( "xri (C++): " << xri0 ); \ - INFO( "xir (C++): " << xir0 ); \ - INFO( "xri (BLIS): " << xri ); \ - INFO( "xir (BLIS): " << xir ); \ -\ - check( xri, xri0 ); \ - check( xir, xir0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, C, C, scal1es ) - -/****************************************************************************** - * - * copy1rs - * - *****************************************************************************/ - -// tcopy1rs unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ -UNIT_TEST(chx,chy,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto y0 = convert( x ); \ -\ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tcopy1rs( chx,chy, x, real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( y, y0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( C, C, copy1rs ) - -/****************************************************************************** - * - * invert1rs - * - *****************************************************************************/ - -// tinvert1rs unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(chx,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - { \ - auto x0 = convert( convert_prec( 1.0 ) / \ - convert_prec( x ) ); \ -\ - INFO( "x: " << x ); \ -\ - bli_tinvert1rs( chx,chc, real( x ), imag( x ) ); \ -\ - INFO( "x (C++): " << x0 ); \ - INFO( "x (BLIS): " << x ); \ -\ - check( x, x0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX2( C, C, invert1rs ) - -/****************************************************************************** - * - * scal21rs - * - *****************************************************************************/ - -// tscal21rs unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto xr = real( x ); \ - auto xi = imag( x ); \ -\ - auto y0 = convert( convert_prec( a ) * \ - convert_prec( x ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tscal21rs( cha,chx,chy,chc, a, x, real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( yri, yri0 ); \ - check( yir, yir0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, C, C, C, scal21rs ) - -// tscal2j1rs unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ -UNIT_TEST(cha,chx,chy,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto xr = real( x ); \ - auto xi = imag( x ); \ -\ - auto y0 = convert( convert_prec( a ) * \ - convert_prec( conj( x ) ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x: " << x ); \ -\ - ctypey y; \ - bli_tscal2j1rs( cha,chx,chy,chc, a, x, real( y ), imag( y ) ); \ -\ - INFO( "y (C++): " << y0 ); \ - INFO( "y (BLIS): " << y ); \ -\ - check( yri, yri0 ); \ - check( yir, yir0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX4( RC, C, C, C, scal2j1rs ) - -/****************************************************************************** - * - * scal1rs - * - *****************************************************************************/ - -// tscal1rs unit test -#undef GENTFUNC -#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ -UNIT_TEST(cha,chx,chc,opname) \ -( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - { \ - auto x0 = convert( convert_prec( a ) * \ - convert_prec( x ) ); \ -\ - INFO( "a: " << a ); \ - INFO( "x (orig): " << x ); \ -\ - bli_tscal1rs( cha,chx,chc, a, real( x ), imag( x ) ); \ -\ - INFO( "x (C++): " << x0 ); \ - INFO( "xr(BLIS): " << x ); \ -\ - check( x, x0 ); \ - } \ -) - -INSERT_GENTFUNC_MIX3( RC, C, C, scal1rs ) diff --git a/test/level0/bli_unit_testing.h b/test/level0/test_l0.hpp similarity index 91% rename from test/level0/bli_unit_testing.h rename to test/level0/test_l0.hpp index 58cd34dc3e..0d93237538 100644 --- a/test/level0/bli_unit_testing.h +++ b/test/level0/test_l0.hpp @@ -43,10 +43,11 @@ #include #include #include +#include #include "blis.h" -using unit_test_t = void(*)(); +using unit_test_t = std::function; struct variable_printer_base { @@ -109,7 +110,7 @@ struct unit_test_registrar { test(); } - catch (unit_test_failure) + catch (const unit_test_failure&) { failed++; } @@ -135,6 +136,7 @@ struct unit_test_registrar vars.pop_back(); } + [[noreturn]] void fail(const char* cond) { printf("%sFAILURE%s\n\n", red().c_str(), normal().c_str()); @@ -178,15 +180,15 @@ struct variable_printer : variable_printer_base get_unit_test_registrar().pop_var(this); } - variable_printer& operator<<(const char* message) + variable_printer& operator<<(const char* m) { - this->message = message; + message = m; return *this; } - variable_printer& operator<<(const T& var) + variable_printer& operator<<(const T& v) { - this->var = var; + var = v; return *this; } @@ -206,14 +208,14 @@ struct variable_printer : variable_printer_base get_unit_test_registrar().push_var(this); } - virtual ~variable_printer() + virtual ~variable_printer() override { get_unit_test_registrar().pop_var(this); } - variable_printer& operator<<(const char* message) + variable_printer& operator<<(const char* m) { - this->message = message; + message = m; return *this; } @@ -243,27 +245,27 @@ struct variable_printer_helper VARIABLE_PRINTER(__VA_ARGS__) VAR_NAME(id); \ VAR_NAME(id) << __VA_ARGS__; -#if ENABLE_INFO +#ifdef ENABLE_INFO #define INFO(...) INFO_(__COUNTER__, __VA_ARGS__) #else #define INFO(...) #endif -#define TEST_NAME_(line) unit_test_##line -#define TEST_NAME(line) TEST_NAME_(line) +#define TEST_NAME_(line,name) unit_test_##name##_##line +#define TEST_NAME(line,name) TEST_NAME_(line,name) -#define TEST_ID_(line) unit_test_id_##line -#define TEST_ID(line) TEST_ID_(line) +#define TEST_ID_(line,name) unit_test_id_##name##_##line +#define TEST_ID(line,name) TEST_ID_(line,name) #define TEST_CASE_(id,name) \ -extern "C" void TEST_NAME(id)(); \ -static auto TEST_ID(id) = register_unit_test(TEST_NAME(id)); \ -void TEST_NAME(id)() -#define TEST_CASE(name) TEST_CASE_(__COUNTER__,name) +extern "C" void TEST_NAME(id,name)(); \ +static auto TEST_ID(id,name) = register_unit_test(TEST_NAME(id,name)); \ +void TEST_NAME(id,name)() +#define TEST_CASE(name) TEST_CASE_(__LINE__,name) #define REQUIRE(cond) \ do { \ - if ( !BLIS_LIKELY( cond ) ) \ + if ( !__builtin_expect( !!(cond), 1 ) ) \ { \ get_unit_test_registrar().fail( #cond ); \ } \ @@ -302,20 +304,20 @@ class Approx }; #define UNIT_TEST1( ch1, opname ) \ -TEST_CASE(STRINGIFY_INT(ch1##opname)) \ +TEST_CASE(ch1##opname) \ { \ INFO("Type character 1: " << #ch1); \ printf("Testing: %s...", STRINGIFY_INT(ch1##opname)); #define UNIT_TEST2( ch1, ch2, opname ) \ -TEST_CASE(STRINGIFY_INT(ch1##ch2##opname)) \ +TEST_CASE(ch1##ch2##opname) \ { \ INFO("Type character 1: " << #ch1); \ INFO("Type character 2: " << #ch2); \ printf("Testing: %s...", STRINGIFY_INT(ch1##ch2##opname)); #define UNIT_TEST3( ch1, ch2, ch3, opname ) \ -TEST_CASE(STRINGIFY_INT(ch1##ch2##ch3##opname)) \ +TEST_CASE(ch1##ch2##ch3##opname) \ { \ INFO("Type character 1: " << #ch1); \ INFO("Type character 2: " << #ch2); \ @@ -323,7 +325,7 @@ TEST_CASE(STRINGIFY_INT(ch1##ch2##ch3##opname)) \ printf("Testing: %s...", STRINGIFY_INT(ch1##ch2##ch3##opname)); #define UNIT_TEST4( ch1, ch2, ch3, ch4, opname ) \ -TEST_CASE(STRINGIFY_INT(ch1##ch2##ch3##ch4##opname)) \ +TEST_CASE(ch1##ch2##ch3##ch4##opname) \ { \ INFO("Type character 1: " << #ch1); \ INFO("Type character 2: " << #ch2); \ @@ -332,7 +334,7 @@ TEST_CASE(STRINGIFY_INT(ch1##ch2##ch3##ch4##opname)) \ printf("Testing: %s...", STRINGIFY_INT(ch1##ch2##ch3##ch4##opname)); #define UNIT_TEST5( ch1, ch2, ch3, ch4, ch5, opname ) \ -TEST_CASE(STRINGIFY_INT(ch1##ch2##ch3##ch4##ch5##opname)) \ +TEST_CASE(ch1##ch2##ch3##ch4##ch5##opname) \ { \ INFO("Type character 1: " << #ch1); \ INFO("Type character 2: " << #ch2); \ @@ -714,6 +716,40 @@ std::array,N>,M> imag(const std::array return ret; } +template +std::enable_if_t::value,std::array,M*D>> +bcast(const std::array,M>& x) +{ + std::array,D*M> ret; + for (size_t d = 0;d < D;d++) + for (size_t i = 0;i < M;i++) + for (size_t j = 0;j < N;j++) + ret[d + i*D][j] = x[i][j]; + return ret; +} + +template +std::enable_if_t::value,std::array,M*D>> +bcast(const std::array,M>& x) +{ + std::array,N>,2*D*M> ret_r; + std::array,D*M> ret; + for (size_t d = 0;d < D;d++) + for (size_t i = 0;i < M;i++) + for (size_t j = 0;j < N;j++) + { + ret_r[d + i*D + 0*D*M][j] = real(x[i][j]); + ret_r[d + i*D + 1*D*M][j] = imag(x[i][j]); + } + for (size_t i = 0;i < D*M;i++) + for (size_t j = 0;j < N;j++) + { + real(ret[i][j]) = ret_r[i*2+0][j]; + imag(ret[i][j]) = ret_r[i*2+1][j]; + } + return ret; +} + struct dense_cond { bool operator()(dim_t, dim_t) const { return true; } @@ -725,7 +761,7 @@ struct is_below { doff_t diagoff; - is_below(doff_t diagoff) : diagoff(diagoff) {} + is_below(doff_t d) : diagoff(d) {} bool operator()(dim_t i, dim_t j) const { return j-i <= diagoff; } }; @@ -734,7 +770,7 @@ struct is_above { doff_t diagoff; - is_above(doff_t diagoff) : diagoff(diagoff) {} + is_above(doff_t d) : diagoff(d) {} bool operator()(dim_t i, dim_t j) const { return j-i >= diagoff; } }; diff --git a/test/level0/test_tabsq2s.cxx b/test/level0/test_tabsq2s.cxx new file mode 100644 index 0000000000..d938978f99 --- /dev/null +++ b/test/level0/test_tabsq2s.cxx @@ -0,0 +1,86 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * absq2s + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( norm( convert_prec( x ) ) ); \ +\ + ctypey y; \ + bli_tabsq2s( chx,chy,chc, x, y ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, absq2s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( norm( convert_prec( x ) ) ); \ +\ + ctypey y; \ + bli_tabsq2ris( chx,chy,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, absq2ris ) diff --git a/test/level0/test_tabval2s.cxx b/test/level0/test_tabval2s.cxx new file mode 100644 index 0000000000..bcce27792c --- /dev/null +++ b/test/level0/test_tabval2s.cxx @@ -0,0 +1,109 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * abval2s + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( absolute( convert_prec( x ) ) ); \ +\ + ctypey y; \ + bli_tabval2s( chx,chy,chc, x, y ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, abval2s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( absolute( convert_prec( x ) ) ); \ +\ + ctypey y; \ + bli_tabval2ris( chx,chy,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, abval2ris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc ) \ +UNIT_TEST(chx,chy,chz,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto z0 = convert( convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + ctypez z; \ + bli_tadd3s( chx,chy,chz,chc, x, y, z ); \ +\ + INFO( "z (C++): " << z0 ); \ + INFO( "z (BLIS): " << z ); \ +\ + check( z, z0 ); \ + } \ +) diff --git a/test/level0/test_tadd3s.cxx b/test/level0/test_tadd3s.cxx new file mode 100644 index 0000000000..a4a96962b8 --- /dev/null +++ b/test/level0/test_tadd3s.cxx @@ -0,0 +1,148 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * add3s + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc ) \ +UNIT_TEST(chx,chy,chz,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto z0 = convert( convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + ctypez z; \ + bli_tadd3s( chx,chy,chz,chc, x, y, z ); \ +\ + INFO( "z (C++): " << z0 ); \ + INFO( "z (BLIS): " << z ); \ +\ + check( z, z0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3s); + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc ) \ +UNIT_TEST(chx,chy,chz,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto z0 = convert( conj( convert_prec( x ) ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + ctypez z; \ + bli_tadd3js( chx,chy,chz,chc, x, y, z ); \ +\ + INFO( "z (C++): " << z0 ); \ + INFO( "z (BLIS): " << z ); \ +\ + check( z, z0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3js); + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc ) \ +UNIT_TEST(chx,chy,chz,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto z0 = convert( convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + ctypez z; \ + bli_tadd3ris( chx,chy,chz,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ), \ + real( z ), imag( z ) ); \ +\ + INFO( "z (C++): " << z0 ); \ + INFO( "z (BLIS): " << z ); \ +\ + check( z, z0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3ris); + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc ) \ +UNIT_TEST(chx,chy,chz,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto z0 = convert( conj( convert_prec( x ) ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + ctypez z; \ + bli_tadd3jris( chx,chy,chz,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ), \ + real( z ), imag( z ) ); \ +\ + INFO( "z (C++): " << z0 ); \ + INFO( "z (BLIS): " << z ); \ +\ + check( z, z0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3jris); diff --git a/test/level0/test_tadds.cxx b/test/level0/test_tadds.cxx new file mode 100644 index 0000000000..d48512be3b --- /dev/null +++ b/test/level0/test_tadds.cxx @@ -0,0 +1,196 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * adds + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_tadds( chx,chy,chc, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3(RC, RC, C, adds); + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( conj( convert_prec( x ) ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_taddjs( chx,chy,chc, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, addjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_taddris( chx,chy,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, addris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( conj( convert_prec( x ) ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_taddjris( chx,chy,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, addjris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile( y ); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, 1.0, ymn0, dense ); \ +\ + INFO( "x:\n" << xmn ); \ + INFO( "y (init):\n" << ymn ); \ +\ + bli_tadds_mxn( chx,chy,chc, M, N, &xmn[0][0], N, 1, &ymn[0][0], N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +\ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile( y ); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, 1.0, ymn0, dense ); \ +\ + INFO( "x:\n" << xmn ); \ + INFO( "y (init):\n" << ymn ); \ +\ + bli_tadds_mxn( chx,chy,chc, N, M, &xmn[0][0], 1, N, &ymn[0][0], 1, N ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, adds_mxn ) diff --git a/test/level0/test_taxpbys.cxx b/test/level0/test_taxpbys.cxx new file mode 100644 index 0000000000..57b90ea2cb --- /dev/null +++ b/test/level0/test_taxpbys.cxx @@ -0,0 +1,228 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * axpbys + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chb,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpbys( cha,chx,chb,chy,chc, a, x, b, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, R, axpbys ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chb,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + conj( convert_prec( x ) ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpbyjs( cha,chx,chb,chy,chc, a, x, b, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, R, axpbyjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chb,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpbyris( cha,chx,chb,chy,chc, \ + real( a ), imag( a ), \ + real( x ), imag( x ), \ + real( b ), imag( b ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, R, axpbyris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chb,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + conj( convert_prec( x ) ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpbyjris( cha,chx,chb,chy,chc, \ + real( a ), imag( a ), \ + real( x ), imag( x ), \ + real( b ), imag( b ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, R, axpbyjris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chb,chy,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile( y ); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( a, xmn, b, ymn0, dense ); \ +\ + INFO( "x:\n" << xmn ); \ + INFO( "y (init):\n" << ymn ); \ +\ + bli_taxpbys_mxn( cha,chx,chb,chy,chc, M, N, &a, &xmn[0][0], N, 1, &b, &ymn[0][0], N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +\ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile( y ); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( a, xmn, b, ymn0, dense ); \ +\ + INFO( "x:\n" << xmn ); \ + INFO( "y (init):\n" << ymn ); \ +\ + bli_taxpbys_mxn( cha,chx,chb,chy,chc, N, M, &a, &xmn[0][0], 1, N, &b, &ymn[0][0], 1, N ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, R, axpbys_mxn ) diff --git a/test/level0/test_taxpys.cxx b/test/level0/test_taxpys.cxx new file mode 100644 index 0000000000..6e3b3d3886 --- /dev/null +++ b/test/level0/test_taxpys.cxx @@ -0,0 +1,156 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * axpys + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpys( cha,chx,chy,chc, a, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, axpys ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + conj( convert_prec( x ) ) + \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpyjs( cha,chx,chy,chc, a, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, axpyjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpyris( cha,chx,chy,chc, \ + real( a ), imag( a ), \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, axpyris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + conj( convert_prec( x ) ) + \ + convert_prec( y ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_taxpyjris( cha,chx,chy,chc, \ + real( a ), imag( a ), \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, axpyjris ) diff --git a/test/level0/test_tconjs.cxx b/test/level0/test_tconjs.cxx new file mode 100644 index 0000000000..2216127f48 --- /dev/null +++ b/test/level0/test_tconjs.cxx @@ -0,0 +1,84 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * conjs + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + for ( auto y : test_values() ) \ + { \ + auto y0 = conj( y ); \ +\ + INFO( "y (init): " << y ); \ +\ + bli_tconjs( chy, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( C, conjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + for ( auto y : test_values() ) \ + { \ + auto y0 = conj( y ); \ +\ + INFO( "y (init): " << y ); \ +\ + bli_tconjris( chy, real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( C, conjris ) diff --git a/test/level0/test_tcopycjs.cxx b/test/level0/test_tcopycjs.cxx new file mode 100644 index 0000000000..bfc58edfd2 --- /dev/null +++ b/test/level0/test_tcopycjs.cxx @@ -0,0 +1,92 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * copycjs + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( bli_is_conj( conjx ) ? conj( x ) : x ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tcopycjs( chx,chy, conjx, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, copycjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( bli_is_conj( conjx ) ? conj( x ) : x ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "x: " << x); \ +\ + ctypey y; \ + bli_tcopycjris( chx,chy, conjx, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, copycjris ) diff --git a/test/level0/test_tcopynzs.cxx b/test/level0/test_tcopynzs.cxx new file mode 100644 index 0000000000..de0d84c951 --- /dev/null +++ b/test/level0/test_tcopynzs.cxx @@ -0,0 +1,150 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * copynzs + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + real( y0 ) = convert_prec( real( x ) ); \ + if ( is_complex::value ) \ + imag( y0 ) = convert_prec( imag( x ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_tcopynzs( chx,chy, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, copynzs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + real( y0 ) = convert_prec( real( x ) ); \ + if ( is_complex::value ) \ + imag( y0 ) = convert_prec( -imag( x ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_tcopyjnzs( chx,chy, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, copyjnzs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + real( y0 ) = convert_prec( real( x ) ); \ + if ( is_complex::value ) \ + imag( y0 ) = convert_prec( imag( x ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_tcopynzris( chx,chy, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, copynzris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + real( y0 ) = convert_prec( real( x ) ); \ + if ( is_complex::value ) \ + imag( y0 ) = convert_prec( -imag( x ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (orig): " << y ); \ +\ + bli_tcopyjnzris( chx,chy, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, copyjnzris ) diff --git a/test/level0/test_tcopys.cxx b/test/level0/test_tcopys.cxx new file mode 100644 index 0000000000..111c077739 --- /dev/null +++ b/test/level0/test_tcopys.cxx @@ -0,0 +1,280 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * copys + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tcopys( chx,chy, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, copys ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( conj( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tcopyjs( chx,chy, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, copyjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tcopyris( chx,chy, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, copyris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( conj( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tcopyjris( chx,chy, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, copyjris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto yri0 = convert( x ); \ + auto yir0 = convert( swapri( conj( x ) ) ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey yri, yir; \ + bli_tcopy1es( chx,chy, x, yri, yir ); \ +\ + INFO( "yri (C++): " << yri0 ); \ + INFO( "yir (C++): " << yir0 ); \ + INFO( "yri (BLIS): " << yri ); \ + INFO( "yir (BLIS): " << yir ); \ +\ + check( yri, yri0 ); \ + check( yir, yir0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( C, R, copy1es ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto yri0 = convert( conj( x ) ); \ + auto yir0 = convert( swapri( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey yri, yir; \ + bli_tcopyj1es( chx,chy, x, yri, yir ); \ +\ + INFO( "yri (C++): " << yri0 ); \ + INFO( "yir (C++): " << yir0 ); \ + INFO( "yri (BLIS): " << yri ); \ + INFO( "yir (BLIS): " << yir ); \ +\ + check( yri, yri0 ); \ + check( yir, yir0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( C, R, copyj1es ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tcopy1rs( chx,chy, x, real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( C, R, copy1rs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( conj( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tcopy1rs( chx,chy, x, real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( C, R, copyj1rs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile(); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, 0.0, ymn0, dense ); \ +\ + INFO( "x:\n" << xmn ); \ +\ + bli_tcopys_mxn( chx,chy, M, N, &xmn[0][0], N, 1, &ymn[0][0], N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +\ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile(); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, 0.0, ymn0, dense ); \ +\ + INFO( "x:\n" << xmn ); \ +\ + bli_tcopys_mxn( chx,chy, N, M, &xmn[0][0], 1, N, &ymn[0][0], 1, N ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, copys_mxn ) diff --git a/test/level0/test_tdots.cxx b/test/level0/test_tdots.cxx new file mode 100644 index 0000000000..d55605e1b2 --- /dev/null +++ b/test/level0/test_tdots.cxx @@ -0,0 +1,44 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * dots + * + *****************************************************************************/ + +// No tests, dot(x, y, a) == axpy(y, x, a) diff --git a/test/level0/test_teqs.cxx b/test/level0/test_teqs.cxx new file mode 100644 index 0000000000..31b58364af --- /dev/null +++ b/test/level0/test_teqs.cxx @@ -0,0 +1,218 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * eqs + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto expected = convert_prec( x ) == \ + convert_prec( y ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + auto found = bli_teqs( chx,chy,chc, x, y ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, eqs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto expected = convert_prec( x ) == \ + convert_prec( y ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + auto found = bli_teqris( chx,chy,chc, \ + real(x), imag(x), \ + real(y), imag(y) ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, eqris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto expected = x == convert_prec( 1.0 ); \ +\ + INFO( "x: " << x ); \ +\ + auto found = bli_teq1ris( chx, real( x ), imag( x ) ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, eq1ris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto expected = x == convert_prec( 0.0 ); \ +\ + INFO( "x: " << x ); \ +\ + auto found = bli_teq0ris( chx, real( x ), imag( x ) ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, eq0ris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto expected = x == convert_prec( -1.0 ); \ +\ + INFO( "x: " << x ); \ +\ + auto found = bli_teqm1ris( chx, real( x ), imag( x ) ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, eqm1ris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto expected = x == convert_prec( 1.0 ); \ +\ + INFO( "x: " << x ); \ +\ + auto found = bli_teq1s( chx, x ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, eq1s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto expected = x == convert_prec( 0.0 ); \ +\ + INFO( "x: " << x ); \ +\ + auto found = bli_teq0s( chx, x ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, eq0s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto expected = x == convert_prec( -1.0 ); \ +\ + INFO( "x: " << x ); \ +\ + auto found = bli_teqm1s( chx, x ); \ +\ + INFO( "expected: " << expected ); \ + INFO( "found : " << found ); \ +\ + REQUIRE( expected == found ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, eqm1s ) diff --git a/test/level0/test_tfprints.cxx b/test/level0/test_tfprints.cxx new file mode 100644 index 0000000000..cc98f836a4 --- /dev/null +++ b/test/level0/test_tfprints.cxx @@ -0,0 +1,44 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * fprints + * + *****************************************************************************/ + +// No tests diff --git a/test/level0/test_tgets.cxx b/test/level0/test_tgets.cxx new file mode 100644 index 0000000000..09aa328e0b --- /dev/null +++ b/test/level0/test_tgets.cxx @@ -0,0 +1,71 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * gets + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ +\ + using ctypeyr = make_real_t; \ + using ctypeyc = make_complex_t; \ +\ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypeyr yr, yi; \ + bli_tgets( chx,chy, x, yr, yi ); \ +\ + INFO( "yr (C++): " << real( y0 ) ); \ + INFO( "yi (C++): " << imag( y0 ) ); \ + INFO( "yr (BLIS): " << yr ); \ + INFO( "yi (BLIS): " << yi ); \ +\ + check( yr, real( y0 ) ); \ + check( yi, imag( y0 ) ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, RC, gets ) diff --git a/test/level0/test_tinverts.cxx b/test/level0/test_tinverts.cxx new file mode 100644 index 0000000000..66f72a20a8 --- /dev/null +++ b/test/level0/test_tinverts.cxx @@ -0,0 +1,141 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * inverts + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(chx,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( 1.0 ) / \ + convert_prec( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tinverts( chx,chc, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, inverts ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(chx,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( 1.0 ) / \ + convert_prec( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tinvertris( chx,chc, real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, invertris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(chx,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto xri = x; \ + auto xir = swapri( conj( x ) ); \ +\ + auto xri0 = convert( convert_prec( 1.0 ) / \ + convert_prec( x ) ); \ + auto xir0 = swapri( conj( xri0 ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "xri (orig): " << xri ); \ + INFO( "xir (orig): " << xir ); \ +\ + bli_tinvert1es( chx,chc, xri, xir ); \ +\ + INFO( "xri (C++): " << xri0 ); \ + INFO( "xir (C++): " << xir0 ); \ + INFO( "xri (BLIS): " << xri ); \ + INFO( "xir (BLIS): " << xir ); \ +\ + check( xri, xri0 ); \ + check( xir, xir0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( C, R, invert1es ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(chx,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto x0 = convert( convert_prec( 1.0 ) / \ + convert_prec( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + bli_tinvert1rs( chx,chc, real( x ), imag( x ) ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "x (BLIS): " << x ); \ +\ + check( x, x0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( C, R, invert1rs ) diff --git a/test/level0/test_tinvscals.cxx b/test/level0/test_tinvscals.cxx new file mode 100644 index 0000000000..086b48a859 --- /dev/null +++ b/test/level0/test_tinvscals.cxx @@ -0,0 +1,146 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * invscals + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) / \ + convert_prec( a ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tinvscals( cha,chx,chc, a, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, invscals ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) / \ + convert_prec( conj( a ) ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tinvscaljs( cha,chx,chc, a, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, invscaljs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) / \ + convert_prec( a ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tinvscalris( cha,chx,chc, \ + real( a ), imag( a ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, invscalris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) / \ + convert_prec( conj( a ) ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tinvscaljris( cha,chx,chc, \ + real( a ), imag( a ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, invscaljris ) diff --git a/test/level0/test_tneg2s.cxx b/test/level0/test_tneg2s.cxx new file mode 100644 index 0000000000..c70f6270a9 --- /dev/null +++ b/test/level0/test_tneg2s.cxx @@ -0,0 +1,88 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * neg2s + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( -x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tneg2s( chx,chy, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, neg2s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( -x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tneg2ris( chx,chy, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, neg2ris ) diff --git a/test/level0/test_trandnp2s.cxx b/test/level0/test_trandnp2s.cxx new file mode 100644 index 0000000000..238282ed18 --- /dev/null +++ b/test/level0/test_trandnp2s.cxx @@ -0,0 +1,44 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * randnp2s + * + *****************************************************************************/ + +// No tests diff --git a/test/level0/test_trands.cxx b/test/level0/test_trands.cxx new file mode 100644 index 0000000000..034d02e8b6 --- /dev/null +++ b/test/level0/test_trands.cxx @@ -0,0 +1,44 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * rands + * + *****************************************************************************/ + +// No tests diff --git a/test/level0/test_tscal2s.cxx b/test/level0/test_tscal2s.cxx new file mode 100644 index 0000000000..bcc0e105eb --- /dev/null +++ b/test/level0/test_tscal2s.cxx @@ -0,0 +1,511 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * scal2s + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tscal2s( cha,chx,chy,chc, a, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, scal2s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( conj( x ) ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tscal2js( cha,chx,chy,chc, a, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, scal2js ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tscal2ris( cha,chx,chy,chc, \ + real( a ), imag( a ), \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, scal2ris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( conj( x ) ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tscal2jris( cha,chx,chy,chc, \ + real( a ), imag( a ), \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, scal2jris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto yri0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ + auto yir0 = swapri( conj( yri0 ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey yri, yir; \ + bli_tscal21es( cha,chx,chy,chc, a, x, yri, yir ); \ +\ + INFO( "yri (C++): " << yri0 ); \ + INFO( "yir (C++): " << yir0 ); \ + INFO( "yri (BLIS): " << yri ); \ + INFO( "yir (BLIS): " << yir ); \ +\ + check( yri, yri0 ); \ + check( yir, yir0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, C, C, R, scal21es ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto yri0 = convert( convert_prec( a ) * \ + convert_prec( conj( x ) ) ); \ + auto yir0 = swapri( conj( yri0 ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey yri, yir; \ + bli_tscal2j1es( cha,chx,chy,chc, a, x, yri, yir ); \ +\ + INFO( "yri (C++): " << yri0 ); \ + INFO( "yir (C++): " << yir0 ); \ + INFO( "yri (BLIS): " << yri ); \ + INFO( "yir (BLIS): " << yir ); \ +\ + check( yri, yri0 ); \ + check( yir, yir0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, C, C, R, scal2j1es ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tscal21rs( cha,chx,chy,chc, a, x, real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, C, C, R, scal21rs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( conj( x ) ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tscal2j1rs( cha,chx,chy,chc, a, x, real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, C, C, R, scal2j1rs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +GENTFUNC0( opname, 1, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +GENTFUNC0( opname, 2, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +GENTFUNC0( opname, 5, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) + +#undef GENTFUNC0 +#define GENTFUNC0( opname, D, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,PASTECH(opname,_,D)) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn00 = tile(); \ + auto ymn = tile(); \ +\ + INFO("column-major"); \ +\ + axpbys_mxn( a, bli_is_conj( conjx ) ? conj( xmn ) : xmn, 0.0, ymn00, dense ); \ + auto ymn0 = bcast( ymn00 ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "a: " << a ); \ + INFO( "x:\n" << xmn ); \ +\ + bli_tscal2bbs_mxn( cha,chx,chy,chc, conjx, N, M, &a, &xmn[0][0], 1, N, &ymn[0][0], D, N ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, scal2bbs_mxn ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile(); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( a, bli_is_conj( conjx ) ? conj( xmn ) : xmn, 0.0, ymn0, dense ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "a: " << a ); \ + INFO( "x:\n" << xmn ); \ +\ + bli_tscal2s_mxn( cha,chx,chy,chc, conjx, M, N, &a, &xmn[0][0], N, 1, &ymn[0][0], N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +\ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile(); \ +\ + INFO("column-major"); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( a, bli_is_conj( conjx ) ? conj( xmn ) : xmn, 0.0, ymn0, dense ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "a: " << a ); \ + INFO( "x:\n" << xmn ); \ +\ + bli_tscal2s_mxn( cha,chx,chy,chc, conjx, N, M, &a, &xmn[0][0], 1, N, &ymn[0][0], 1, N ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, scal2s_mxn ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile(); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( a, bli_is_conj( conjx ) \ + ? conj( xmn ) \ + : xmn, \ + 0.0, ymn0, dense ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "a: " << a ); \ + INFO( "x:\n" << xmn ); \ +\ + bli_tscal2ris_mxn( cha,chx,chy,chc, conjx, \ + M, N, &a, \ + &xmn[0][0], N, 1, \ + &ymn[0][0], 2*N, 2, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +\ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile(); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( a, bli_is_conj( conjx ) \ + ? conj( xmn ) \ + : xmn, \ + 0.0, ymn0, dense ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "a: " << a ); \ + INFO( "x:\n" << xmn ); \ +\ + bli_tscal2ris_mxn( cha,chx,chy,chc, \ + conjx, N, M, &a, \ + &xmn[0][0], 1, N, \ + &ymn[0][0], 2, 2*N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, C, C, R, scal2ris_mxn_together ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(cha,chx,chy,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + using ctypeyr = make_real_t; \ +\ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto yrmn = tile(); \ + auto yimn = tile(); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = tile(); \ + axpbys_mxn( a, bli_is_conj( conjx ) \ + ? conj( xmn ) \ + : xmn, \ + 0.0, ymn0, dense ); \ + auto yrmn0 = real( ymn0 ); \ + auto yimn0 = imag( ymn0 ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "a: " << a ); \ + INFO( "x:\n" << xmn ); \ +\ + bli_tscal2ris_mxn( cha,chx,chy,chc, \ + conjx, M, N, &a, \ + &xmn[0][0], N, 1, \ + &yrmn[0][0], N, 1, \ + &yimn[0][0] - &yrmn[0][0] ); \ +\ + INFO( "yr (C++):\n" << yrmn0 ); \ + INFO( "yi (C++):\n" << yimn0 ); \ + INFO( "yr (BLIS):\n" << yrmn ); \ + INFO( "yi (BLIS):\n" << yimn ); \ +\ + check( yrmn, yrmn0 ); \ + check( yimn, yimn0 ); \ + } \ +\ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto yrmn = tile(); \ + auto yimn = tile(); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = tile(); \ + axpbys_mxn( a, bli_is_conj( conjx ) \ + ? conj( xmn ) \ + : xmn, \ + 0.0, ymn0, dense ); \ + auto yrmn0 = real( ymn0 ); \ + auto yimn0 = imag( ymn0 ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "a: " << a ); \ + INFO( "x:\n" << xmn ); \ +\ + bli_tscal2ris_mxn( cha,chx,chy,chc, \ + conjx, N, M, &a, \ + &xmn[0][0], 1, N, \ + &yrmn[0][0], 1, N, \ + &yimn[0][0] - &yrmn[0][0] ); \ +\ + INFO( "yr (C++):\n" << yrmn0 ); \ + INFO( "yi (C++):\n" << yimn0 ); \ + INFO( "yr (BLIS):\n" << yrmn ); \ + INFO( "yi (BLIS):\n" << yimn ); \ +\ + check( yrmn, yrmn0 ); \ + check( yimn, yimn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, C, C, R, scal2ris_mxn_separate ) diff --git a/test/level0/test_tscalcjs.cxx b/test/level0/test_tscalcjs.cxx new file mode 100644 index 0000000000..d4c4bba714 --- /dev/null +++ b/test/level0/test_tscalcjs.cxx @@ -0,0 +1,68 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * scalcjs + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( bli_is_conj( conjx ) ? conj( x ) : x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tscalcjs( cha,chx,chc, conjx, a, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, scalcjs ) diff --git a/test/level0/test_tscals.cxx b/test/level0/test_tscals.cxx new file mode 100644 index 0000000000..dd8c550ec6 --- /dev/null +++ b/test/level0/test_tscals.cxx @@ -0,0 +1,277 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * scals + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tscals( cha,chx,chc, a, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, scals ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( conj( a ) ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tscaljs( cha,chx,chc, a, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, scaljs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tscalris( cha,chx,chc, \ + real( a ), imag( a ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, scalris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( convert_prec( conj( a ) ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + ctypex y = x; \ + bli_tscaljris( cha,chx,chc, \ + real( a ), imag( a ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, scaljris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto xri = x; \ + auto xir = swapri( conj( x ) ); \ +\ + auto xri0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ + auto xir0 = swapri( conj( xri0 ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "xri (orig): " << xri ); \ + INFO( "xir (orig): " << xir ); \ +\ + bli_tscal1es( cha,chx,chc, a, xri, xir ); \ +\ + INFO( "xri (C++): " << xri0 ); \ + INFO( "xir (C++): " << xir0 ); \ + INFO( "xri (BLIS): " << xri ); \ + INFO( "xir (BLIS): " << xir ); \ +\ + check( xri, xri0 ); \ + check( xir, xir0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, C, R, scal1es ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto x0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x (orig): " << x ); \ +\ + bli_tscal1rs( cha,chx,chc, a, real( x ), imag( x ) ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "xr(BLIS): " << x ); \ +\ + check( x, x0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, C, R, scal1rs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto diagoff : { -1, 0, 1 } ) \ + { \ + auto xmn = tile( x ); \ +\ + INFO( "row-major" ); \ +\ + std::function func = is_below( diagoff ); \ + if ( uplo == BLIS_UPPER ) func = is_above( diagoff ); \ +\ + auto xmn0 = xmn; \ + axpbys_mxn( a, xmn, 0.0, xmn0, func ); \ +\ + INFO( "upper: " << ( uplo == BLIS_UPPER ) ); \ + INFO( "diagoff: " << diagoff ); \ + INFO( "a: " << a ); \ + INFO( "x (init):\n" << xmn ); \ +\ + bli_tscalris_mxn_uplo( cha,chx,chc, uplo, diagoff, M, N, \ + &real( a ), &real( a )+1, \ + &real( xmn[0][0] ), &real( xmn[0][0] )+1, \ + &real( xmn[1][0] ) - &real( xmn[0][0] ), \ + &real( xmn[0][1] ) - &real( xmn[0][0] ) ); \ +\ + INFO( "x (C++):\n" << xmn0 ); \ + INFO( "x (BLIS):\n" << xmn ); \ +\ + check( xmn, xmn0 ); \ + } \ +\ + for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto diagoff : { -1, 0, 1 } ) \ + { \ + auto xmn = tile( x ); \ +\ + INFO( "column-major" ); \ +\ + std::function func = is_below( diagoff ); \ + if ( uplo == BLIS_UPPER ) func = is_above( diagoff ); \ +\ + auto xmn0 = xmn; \ + axpbys_mxn( a, xmn, 0.0, xmn0, func ); \ +\ + INFO( "upper: " << ( uplo == BLIS_UPPER ) ); \ + INFO( "diagoff: " << diagoff ); \ + INFO( "a: " << a ); \ + INFO( "x (init):\n" << xmn ); \ +\ + bli_tscalris_mxn_uplo( cha,chx,chc, uplo, diagoff, N, M, \ + &real( a ), &real( a )+1, \ + &real( xmn[0][0] ), &real( xmn[0][0] )+1, \ + &real( xmn[0][1] ) - &real( xmn[0][0] ), \ + &real( xmn[1][0] ) - &real( xmn[0][0] ) ); \ +\ + INFO( "x (C++):\n" << xmn0 ); \ + INFO( "x (BLIS):\n" << xmn ); \ +\ + check( xmn, xmn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, scalris_mxn_uplo ) diff --git a/test/level0/test_tsets.cxx b/test/level0/test_tsets.cxx new file mode 100644 index 0000000000..1480ec8243 --- /dev/null +++ b/test/level0/test_tsets.cxx @@ -0,0 +1,361 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * sets + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tsets( chx,chy, real( x ), imag( x ), y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, RC, sets ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + real( y0 ) = convert_prec( real( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + bli_tsetrs( chx,chy, real( x ), y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, RC, setrs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + imag( y0 ) = convert_prec( imag( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + bli_tsetis( chx,chy, imag( x ), y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, RC, setis ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( x ); \ +\ + INFO( "x: " << x ); \ +\ + ctypey y; \ + bli_tsetris( chx,chy, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, RC, setris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( 0.0 ); \ +\ + INFO( "y (init): " << y ); \ +\ + bli_tset0s( chy, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, set0s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( 1.0 ); \ +\ + INFO( "y (init): " << y ); \ +\ + bli_tset1s( chy, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, set1s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + real( y0 ) = convert_prec( 0.0 ); \ +\ + INFO( "y (init): " << y ); \ +\ + bli_tsetr0s( chy, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, setr0s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + for ( auto y : test_values() ) \ + { \ + auto y0 = y; \ + imag( y0 ) = convert_prec( 0.0 ); \ +\ + INFO( "y (init): " << y ); \ +\ + bli_tseti0s( chy, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, seti0s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( 0.0 ); \ +\ + bli_tset0ris( chy, real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, set0ris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +UNIT_TEST(chy,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ + \ + for ( auto y : test_values() ) \ + { \ + auto ymn = tile( y ); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = tile( convert( 0.0 ) ); \ +\ + INFO( "y (init):\n" << ymn); \ +\ + bli_tset0s_mxn( chy, M, N, &ymn[0][0], N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ + \ + for ( auto y : test_values() ) \ + { \ + auto ymn = tile( y ); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = tile( convert( 0.0 ) ); \ +\ + INFO( "y (init):\n" << ymn ); \ +\ + bli_tset0s_mxn( chy, N, M, &ymn[0][0], 1, N ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, set0s_mxn ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +GENTFUNC0( opname, 1, ctypey, chy ) \ +GENTFUNC0( opname, 2, ctypey, chy ) \ +GENTFUNC0( opname, 5, ctypey, chy ) + +#undef GENTFUNC0 +#define GENTFUNC0( opname, D, ctypey, chy ) \ +UNIT_TEST(chy,PASTECH(opname,_,D)) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ + \ + for ( auto y : test_values() ) \ + { \ + auto ymn = tile( y ); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = tile( convert( 0.0 ) ); \ +\ + INFO( "y (init):\n" << ymn ); \ +\ + bli_tset0s_mxn( chy, N, M, &ymn[0][0], 1, N ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, set0bbs_mxn ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypey, chy ) \ +GENTFUNC0( opname, 10, 10, ctypey, chy ) \ +GENTFUNC0( opname, 10, 4, ctypey, chy ) \ +GENTFUNC0( opname, 4, 10, ctypey, chy ) \ +GENTFUNC0( opname, 10, 0, ctypey, chy ) \ +GENTFUNC0( opname, 0, 10, ctypey, chy ) \ +GENTFUNC0( opname, 4, 0, ctypey, chy ) \ +GENTFUNC0( opname, 0, 4, ctypey, chy ) \ +GENTFUNC0( opname, 0, 0, ctypey, chy ) + +#undef GENTFUNC0 +#define GENTFUNC0( opname, M, N, ctypey, chy ) \ +UNIT_TEST(chy,PASTECH(opname,_,M,_,N)) \ +( \ + constexpr auto M0 = 10; \ + constexpr auto N0 = 10; \ + \ + for ( auto y : test_values() ) \ + { \ + auto ymn = tile( y ); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = tile( convert( 0.0 ) ); \ + for ( auto i = 0; i < M; i++ ) \ + for ( auto j = 0; j < N; j++ ) \ + ymn0[i][j] = y; \ +\ + INFO( "y (init):\n" << ymn ); \ +\ + bli_tset0s_edge( chy, M, M0, N, N0, &ymn[0][0], M0 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, set0s_edge ) diff --git a/test/level0/test_tsqrt2s.cxx b/test/level0/test_tsqrt2s.cxx new file mode 100644 index 0000000000..070eea9e4e --- /dev/null +++ b/test/level0/test_tsqrt2s.cxx @@ -0,0 +1,86 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * sqrt2s + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( square_root( convert_prec( x ) ) ); \ +\ + ctypey y; \ + bli_tsqrt2s( chx,chy,chc, x, y ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( R, R, R, sqrt2s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto y0 = convert( square_root( convert_prec( x ) ) ); \ +\ + ctypey y; \ + bli_tsqrt2ris( chx,chy,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( R, R, R, sqrt2ris ) diff --git a/test/level0/test_tsubs.cxx b/test/level0/test_tsubs.cxx new file mode 100644 index 0000000000..10e27af4eb --- /dev/null +++ b/test/level0/test_tsubs.cxx @@ -0,0 +1,142 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * subs + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( y ) - \ + convert_prec( x ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_tsubs( chx,chy,chc, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, subs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( y ) - \ + conj( convert_prec( x ) ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_tsubjs( chx,chy,chc, x, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, subjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( y ) - \ + convert_prec( x ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_tsubris( chx,chy,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, subris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( y ) - \ + conj( convert_prec( x ) ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_tsubjris( chx,chy,chc, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, subjris ) diff --git a/test/level0/test_tswaps.cxx b/test/level0/test_tswaps.cxx new file mode 100644 index 0000000000..9a0ff8103b --- /dev/null +++ b/test/level0/test_tswaps.cxx @@ -0,0 +1,98 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * swaps + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto x0 = convert( y ); \ + auto y0 = convert( x ); \ +\ + INFO( "x (init): " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_tswaps( chx,chy, x, y ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "x (BLIS): " << x ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( x, x0 ); \ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, RC, swaps ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ +UNIT_TEST(chx,chy,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto x0 = convert( y ); \ + auto y0 = convert( x ); \ +\ + INFO( "x (init): " << x ); \ + INFO( "y (init): " << y ); \ +\ + bli_tswapris( chx,chy, \ + real( x ), imag( x ), \ + real( y ), imag( y ) ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "y (C++): " << y0 ); \ + INFO( "x (BLIS): " << x ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( x, x0 ); \ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, RC, swapris ) diff --git a/test/level0/test_txpbys.cxx b/test/level0/test_txpbys.cxx new file mode 100644 index 0000000000..d05ad4bcaf --- /dev/null +++ b/test/level0/test_txpbys.cxx @@ -0,0 +1,286 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test_l0.hpp" + +/****************************************************************************** + * + * xpbys + * + *****************************************************************************/ + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chb,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_txpbys( chx,chb,chy,chc, x, b, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, xpbys ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chb,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( conj( convert_prec( x ) ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_txpbyjs( chx,chb,chy,chc, x, b, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, xpbyjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chb,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_txpbyris( chx,chb,chy,chc, \ + real( x ), imag( x ), \ + real( b ), imag( b ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, xpbyris ) + +// txpbyjris +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chb,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( conj( convert_prec( x ) ) + \ + convert_prec( b ) * \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ + INFO( "y (init): " << y ); \ +\ + bli_txpbyjris( chx,chb,chy,chc, \ + real( x ), imag( x ), \ + real( b ), imag( b ), \ + real( y ), imag( y ) ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, xpbyjris ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chb,chy,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile( y ); \ +\ + INFO( "row-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, b, ymn0, dense ); \ +\ + INFO( "x:\n" << xmn ); \ + INFO( "b: " << b ); \ + INFO( "y (init):\n" << ymn ); \ +\ + bli_txpbys_mxn( chx,chb,chy,chc, M, N, &xmn[0][0], N, 1, &b, &ymn[0][0], N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +\ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile( y ); \ +\ + INFO( "column-major" ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, b, ymn0, dense ); \ +\ + INFO( "x:\n" << xmn ); \ + INFO( "b: " << b ); \ + INFO( "y (init):\n" << ymn ); \ +\ + bli_txpbys_mxn( chx,chb,chy,chc, N, M, &xmn[0][0], 1, N, &b, &ymn[0][0], 1, N ); \ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, xpbys_mxn ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chb,chy,chc,opname) \ +( \ + constexpr auto M = 4; \ + constexpr auto N = 4; \ +\ + for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + for ( auto diagoff : { -1, 0, 1 } ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile( y ); \ +\ + INFO( "row-major" ); \ +\ + std::function func = is_below( diagoff ); \ + if ( uplo == BLIS_UPPER ) func = is_above( diagoff ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, b, ymn0, func ); \ +\ + INFO( "upper: " << ( uplo == BLIS_UPPER ) ); \ + INFO( "diagoff: " << diagoff ); \ + INFO( "x:\n" << xmn ); \ + INFO( "b: " << b ); \ + INFO( "y (init):\n" << ymn ); \ +\ + bli_txpbys_mxn_uplo( chx,chb,chy,chc, diagoff, uplo, M, N, &xmn[0][0], N, 1, &b, &ymn[0][0], N, 1 ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +\ + for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + for ( auto y : test_values() ) \ + for ( auto diagoff : { -1, 0, 1 } ) \ + { \ + auto xmn = tile( x ); \ + auto ymn = tile( y ); \ +\ + INFO( "column-major" ); \ +\ + std::function func = is_below( diagoff ); \ + if ( uplo == BLIS_UPPER ) func = is_above( diagoff ); \ +\ + auto ymn0 = ymn; \ + axpbys_mxn( 1.0, xmn, b, ymn0, func ); \ +\ + INFO( "upper: " << ( uplo == BLIS_UPPER ) ); \ + INFO( "diagoff: " << diagoff ); \ + INFO( "x:\n" << xmn ); \ + INFO( "b: " << b ); \ + INFO( "y (init):\n" << ymn ); \ +\ + bli_txpbys_mxn_uplo( chx,chb,chy,chc, diagoff, uplo, N, M, &xmn[0][0], 1, N, &b, &ymn[0][0], 1, N ); \ +\ + INFO( "y (C++):\n" << ymn0 ); \ + INFO( "y (BLIS):\n" << ymn ); \ +\ + check( ymn, ymn0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, xpbys_mxn_uplo ) From 9c00ded4e8e141452c9e5503ee1313e5f377bebf Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Mon, 4 Nov 2024 16:53:50 -0600 Subject: [PATCH 08/19] Fix remaining errors in level-0 macros and tests. --- frame/include/level0/bli_taxpbys.h | 19 +++++-- frame/include/level0/bli_tinverts.h | 2 +- frame/include/level0/bli_tscal2s.h | 62 +++++++++++++++------ frame/include/level0/bli_tscals.h | 2 +- test/level0/Makefile | 4 +- test/level0/test_l0.cxx | 8 ++- test/level0/test_l0.hpp | 86 +++++++++++++++-------------- test/level0/test_tadd3s.cxx | 41 ++++++++++++++ test/level0/test_taxpbys.cxx | 28 ++++++++++ test/level0/test_taxpys.cxx | 25 +++++++++ test/level0/test_tconjs.cxx | 4 +- test/level0/test_tcopycjs.cxx | 27 ++++++++- test/level0/test_tcopynzs.cxx | 8 +-- test/level0/test_tcopys.cxx | 41 ++++++++++---- test/level0/test_tneg2s.cxx | 25 ++++++++- test/level0/test_tscal2s.cxx | 28 +++++++++- test/level0/test_tscalcjs.cxx | 5 +- test/level0/test_tsets.cxx | 8 +-- 18 files changed, 324 insertions(+), 99 deletions(-) diff --git a/frame/include/level0/bli_taxpbys.h b/frame/include/level0/bli_taxpbys.h index 76dd8d6b3d..6bfe6dfa9b 100644 --- a/frame/include/level0/bli_taxpbys.h +++ b/frame/include/level0/bli_taxpbys.h @@ -51,9 +51,9 @@ chc \ ) \ { \ - PASTEMAC(dy,declinits)( py, yr, yi, yorigr, yorigi ) \ - PASTEMAC(dy,assigns) \ + PASTEMAC(c,declinits) \ ( \ + py, \ PASTEMAC(chc,py,tcast)( \ PASTEMAC(chc,add)( \ PASTEMAC(chc,sub)( \ @@ -77,14 +77,14 @@ chc, \ PASTEMAC(chc,mul)( \ PASTEMAC(pb,chc,tcast)(br), \ - PASTEMAC(py,chc,tcast)(yorigr) \ + PASTEMAC(py,chc,tcast)(yr) \ ) \ ), \ PASTEMAC(db,dy,termii)( \ chc, \ PASTEMAC(chc,mul)( \ PASTEMAC(pb,chc,tcast)(bi), \ - PASTEMAC(py,chc,tcast)(yorigi) \ + PASTEMAC(py,chc,tcast)(yi) \ ) \ ) \ ) \ @@ -113,19 +113,26 @@ chc, \ PASTEMAC(chc,mul)( \ PASTEMAC(pb,chc,tcast)(bi), \ - PASTEMAC(py,chc,tcast)(yorigr) \ + PASTEMAC(py,chc,tcast)(yr) \ ) \ ), \ PASTEMAC(db,dy,termri)( \ chc, \ PASTEMAC(chc,mul)( \ PASTEMAC(pb,chc,tcast)(br), \ - PASTEMAC(py,chc,tcast)(yorigi) \ + PASTEMAC(py,chc,tcast)(yi) \ ) \ ) \ ) \ ) \ ), \ + tr, \ + ti \ + ); \ + PASTEMAC(dy,assigns) \ + ( \ + tr, \ + ti, \ yr, \ yi \ ); \ diff --git a/frame/include/level0/bli_tinverts.h b/frame/include/level0/bli_tinverts.h index ec8698298e..c797fa3f18 100644 --- a/frame/include/level0/bli_tinverts.h +++ b/frame/include/level0/bli_tinverts.h @@ -177,7 +177,7 @@ // -- 1e / 1r -- // invert1es -#define bli_tinvert1es( chx, chc, xir, xri ) \ +#define bli_tinvert1es( chx, chc, xri, xir ) \ bli_tinvertims \ ( \ PASTEMAC(chx,dom), \ diff --git a/frame/include/level0/bli_tscal2s.h b/frame/include/level0/bli_tscal2s.h index 5dc4e25e5b..4b0b7e7da1 100644 --- a/frame/include/level0/bli_tscal2s.h +++ b/frame/include/level0/bli_tscal2s.h @@ -317,23 +317,49 @@ const dim_t d = incy; \ const dim_t ds_y = 1; \ \ - for ( dim_t j = 0; j < (n); ++j ) \ + if ( bli_is_conj( conjx ) ) \ { \ - ctypex* restrict xj = (ctypex*)(x) + j*(ldx); \ - ctypey* restrict yj = (ctypey*)(y) + j*(ldy); \ -\ - for ( dim_t i = 0; i < (m); ++i ) \ + for ( dim_t j = 0; j < (n); ++j ) \ { \ - ctypex* restrict xij = xj + i*(incx); \ - ctypey* restrict yij = yj + i*(incy); \ -\ - bli_tscal2s( cha,chx,chy,chc, *(const ctypea* restrict)(alpha), *xij, *yij ); \ -\ - for ( dim_t p = 1; p < d; ++p ) \ + ctypex* restrict xj = (ctypex*)(x) + j*(ldx); \ + ctypey* restrict yj = (ctypey*)(y) + j*(ldy); \ + \ + for ( dim_t i = 0; i < (m); ++i ) \ { \ - ctypey* restrict yijd = yij + p*ds_y; \ -\ - bli_tcopys( chy,chy, *yij, *yijd ); \ + ctypex* restrict xij = xj + i*(incx); \ + ctypey* restrict yij = yj + i*(incy); \ + \ + bli_tscal2js( cha,chx,chy,chc, *(const ctypea* restrict)(alpha), *xij, *yij ); \ + \ + for ( dim_t p = 1; p < d; ++p ) \ + { \ + ctypey* restrict yijd = yij + p*ds_y; \ + \ + bli_tcopys( chy,chy, *yij, *yijd ); \ + } \ + } \ + } \ + } \ + else \ + { \ + for ( dim_t j = 0; j < (n); ++j ) \ + { \ + ctypex* restrict xj = (ctypex*)(x) + j*(ldx); \ + ctypey* restrict yj = (ctypey*)(y) + j*(ldy); \ + \ + for ( dim_t i = 0; i < (m); ++i ) \ + { \ + ctypex* restrict xij = xj + i*(incx); \ + ctypey* restrict yij = yj + i*(incy); \ + \ + bli_tscal2s( cha,chx,chy,chc, *(const ctypea* restrict)(alpha), *xij, *yij ); \ + \ + for ( dim_t p = 1; p < d; ++p ) \ + { \ + ctypey* restrict yijd = yij + p*ds_y; \ + \ + bli_tcopys( chy,chy, *yij, *yijd ); \ + } \ } \ } \ } \ @@ -356,11 +382,11 @@ const dim_t d = incy; \ const dim_t ds_y = 1; \ \ - const inc_t incx2 = 2 * (incx); \ - const inc_t ldx2 = 2 * (ldx); \ + const inc_t incx2 = sizeof(ctypex) / sizeof(ctypex_r) * (incx); \ + const inc_t ldx2 = sizeof(ctypex) / sizeof(ctypex_r) * (ldx); \ \ - const inc_t incy2 = 2 * (incy); \ - const inc_t ldy2 = 2 * (ldy); \ + const inc_t incy2 = sizeof(ctypey) / sizeof(ctypey_r) * (incy); \ + const inc_t ldy2 = sizeof(ctypey) / sizeof(ctypey_r) * (ldy); \ \ ctypea_r* restrict alpha_r = ( ctypea_r* )(alpha); \ ctypea_r* restrict alpha_i = ( ctypea_r* )(alpha) + 1; (void)alpha_i; \ diff --git a/frame/include/level0/bli_tscals.h b/frame/include/level0/bli_tscals.h index eaa2d9b986..f0d5e18c5a 100644 --- a/frame/include/level0/bli_tscals.h +++ b/frame/include/level0/bli_tscals.h @@ -168,7 +168,7 @@ // -- 1e / 1r -- // scal1es -#define bli_tscal1es( cha, chx, chc, a, xir, xri ) \ +#define bli_tscal1es( cha, chx, chc, a, xri, xir ) \ bli_tscalims \ ( \ PASTEMAC(cha,dom), \ diff --git a/test/level0/Makefile b/test/level0/Makefile index 70ead01de8..1af6dbfb83 100644 --- a/test/level0/Makefile +++ b/test/level0/Makefile @@ -106,10 +106,10 @@ CXXFLAGS := $(call get-frame-cxxflags-for,$(CONFIG_NAME)) # Add installed and local header paths to CFLAGS CFLAGS += -I$(TEST_SRC_PATH) -CXXFLAGS += -I$(TEST_SRC_PATH) +CXXFLAGS += -I$(TEST_SRC_PATH) -DENABLE_INFO HDR_SUFFIXES := h hpp -HEADERS := $(foreach suf, $(HDR_SUFFIXES), $(wildcard $(TEST_SRC_PATH)/*.$(suf))) +HEADERS := $(foreach suf, $(HDR_SUFFIXES), $(wildcard $(TEST_SRC_PATH)/*.$(suf))) $(INC_PATH)/blis.h # diff --git a/test/level0/test_l0.cxx b/test/level0/test_l0.cxx index a461d44a22..86af4e0486 100644 --- a/test/level0/test_l0.cxx +++ b/test/level0/test_l0.cxx @@ -35,7 +35,13 @@ #include "test_l0.hpp" +unit_test_registrar& get_unit_test_registrar() +{ + static unit_test_registrar registrar; + return registrar; +} + int main() { - get_unit_test_registrar().run_tests(); + return !get_unit_test_registrar().run_tests(); } diff --git a/test/level0/test_l0.hpp b/test/level0/test_l0.hpp index 0d93237538..710c592c41 100644 --- a/test/level0/test_l0.hpp +++ b/test/level0/test_l0.hpp @@ -63,34 +63,34 @@ struct unit_test_registrar std::vector tests; std::vector vars; - static const std::string& red() + static const char* red() { #ifdef BLIS_OS_WINDOWS static std::string s = _isatty(_fileno(stdout)) ? "\e[0;31m" : ""; #else static std::string s = isatty(fileno(stdout)) ? "\e[0;31m" : ""; #endif - return s; + return s.c_str(); } - static const std::string& green() + static const char* green() { #ifdef BLIS_OS_WINDOWS static std::string s = _isatty(_fileno(stdout)) ? "\e[0;32m" : ""; #else static std::string s = isatty(fileno(stdout)) ? "\e[0;32m" : ""; #endif - return s; + return s.c_str(); } - static const std::string& normal() + static const char* normal() { #ifdef BLIS_OS_WINDOWS static std::string s = _isatty(_fileno(stdout)) ? "\e[0m" : ""; #else static std::string s = isatty(fileno(stdout)) ? "\e[0m" : ""; #endif - return s; + return s.c_str(); } size_t register_test(unit_test_t test) @@ -99,7 +99,7 @@ struct unit_test_registrar return tests.size()-1; } - void run_tests() + bool run_tests() { auto failed = 0; auto total = 0; @@ -120,9 +120,11 @@ struct unit_test_registrar printf("\n"); printf("Total tests: %d\n", total); - printf("%sPassed: %d (%.1f%%)%s\n", green().c_str(), total-failed, 100.0*(total-failed)/total, normal().c_str()); + printf("%sPassed: %d (%.1f%%)%s\n", green(), total-failed, 100.0*(total-failed)/total, normal()); if (failed) - printf("%sFailed: %d (%.1f%%)%s\n\n", red().c_str(), failed, 100.0*failed/total, normal().c_str()); + printf("%sFailed: %d (%.1f%%)%s\n\n", red(), failed, 100.0*failed/total, normal()); + + return failed; } void push_var(const variable_printer_base* var) @@ -139,7 +141,7 @@ struct unit_test_registrar [[noreturn]] void fail(const char* cond) { - printf("%sFAILURE%s\n\n", red().c_str(), normal().c_str()); + printf("%sFAILURE%s\n\n", red(), normal()); for (auto& var : vars) var->print(); @@ -153,13 +155,9 @@ struct unit_test_registrar } }; -static unit_test_registrar& get_unit_test_registrar() -{ - static unit_test_registrar registrar; - return registrar; -} +unit_test_registrar& get_unit_test_registrar(); -static size_t register_unit_test(unit_test_t test) +inline size_t register_unit_test(unit_test_t test) { return get_unit_test_registrar().register_test(test); } @@ -175,7 +173,7 @@ struct variable_printer : variable_printer_base get_unit_test_registrar().push_var(this); } - virtual ~variable_printer() + virtual ~variable_printer() override { get_unit_test_registrar().pop_var(this); } @@ -259,7 +257,7 @@ VAR_NAME(id) << __VA_ARGS__; #define TEST_CASE_(id,name) \ extern "C" void TEST_NAME(id,name)(); \ -static auto TEST_ID(id,name) = register_unit_test(TEST_NAME(id,name)); \ +auto TEST_ID(id,name) = register_unit_test(TEST_NAME(id,name)); \ void TEST_NAME(id,name)() #define TEST_CASE(name) TEST_CASE_(__LINE__,name) @@ -345,7 +343,7 @@ TEST_CASE(ch1##ch2##ch3##ch4##ch5##opname) \ #define UNIT_TEST_BODY( ... ) \ __VA_ARGS__; \ - printf("%sPASS%s\n", unit_test_registrar::green().c_str(), unit_test_registrar::normal().c_str()); \ + printf("%sPASS%s\n", unit_test_registrar::green(), unit_test_registrar::normal()); \ } #define UNIT_TEST_SELECTOR_( ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7, ... ) ARG7 @@ -613,7 +611,7 @@ inline bool bli_isinf( double x ) { return bli_disinf( x ); } template std::enable_if_t::value> check(T x, T y) { - auto tol = 2*std::numeric_limits>::epsilon(); + auto tol = 8*std::numeric_limits>::epsilon(); INFO("x: " << x); INFO("y: " << y); INFO("|x-y|: " << std::abs(x-y)); @@ -629,19 +627,23 @@ std::enable_if_t::value> check(T x, T y) template std::enable_if_t::value> check(const T& x, const T& y) { - INFO("Real part:"); - check( x.real, y.real ); - INFO("Imag part:"); - check( x.imag, y.imag ); + { + INFO("Real part:"); + check( x.real, y.real ); + } + { + INFO("Imag part:"); + check( x.imag, y.imag ); + } } template std::enable_if_t::value,std::vector> test_values(int mask = BLIS_TEST_DEFAULT) { - std::vector vals{1.439}; + std::vector vals{0.439}; if (mask & BLIS_TEST_NEGATIVE) - vals.push_back(-2.563); + vals.push_back(-0.563); if (mask & BLIS_TEST_ZERO) vals.push_back(0); @@ -717,35 +719,35 @@ std::array,N>,M> imag(const std::array } template -std::enable_if_t::value,std::array,M*D>> +std::enable_if_t::value,std::array,M>> bcast(const std::array,M>& x) { - std::array,D*M> ret; + std::array,M> ret; for (size_t d = 0;d < D;d++) for (size_t i = 0;i < M;i++) for (size_t j = 0;j < N;j++) - ret[d + i*D][j] = x[i][j]; + ret[i][d + j*D] = x[i][j]; return ret; } template -std::enable_if_t::value,std::array,M*D>> +std::enable_if_t::value,std::array,M>> bcast(const std::array,M>& x) { - std::array,N>,2*D*M> ret_r; - std::array,D*M> ret; + std::array,2*D*N>,M> ret_r; + std::array,M> ret; for (size_t d = 0;d < D;d++) for (size_t i = 0;i < M;i++) for (size_t j = 0;j < N;j++) { - ret_r[d + i*D + 0*D*M][j] = real(x[i][j]); - ret_r[d + i*D + 1*D*M][j] = imag(x[i][j]); + ret_r[i][d + 0*D + j*2*D] = real(x[i][j]); + ret_r[i][d + 1*D + j*2*D] = imag(x[i][j]); } - for (size_t i = 0;i < D*M;i++) - for (size_t j = 0;j < N;j++) + for (size_t i = 0;i < M;i++) + for (size_t j = 0;j < D*N;j++) { - real(ret[i][j]) = ret_r[i*2+0][j]; - imag(ret[i][j]) = ret_r[i*2+1][j]; + real(ret[i][j]) = ret_r[i][j*2+0]; + imag(ret[i][j]) = ret_r[i][j*2+1]; } return ret; } @@ -800,10 +802,10 @@ void axpbys_mxn(const A& a, const std::array,M>& x, y[i][j] = convert(convert_prec(a) * convert_prec(x[i][j])); else - y[i][j] = convert(convert_prec(a) * - convert_prec(x[i][j]) + - convert_prec(b) * - convert_prec(y[i][j])); + y[i][j] = convert((convert_prec(a) * + convert_prec(x[i][j])) + + (convert_prec(b) * + convert_prec(y[i][j]))); } } diff --git a/test/level0/test_tadd3s.cxx b/test/level0/test_tadd3s.cxx index a4a96962b8..5933cabe93 100644 --- a/test/level0/test_tadd3s.cxx +++ b/test/level0/test_tadd3s.cxx @@ -66,6 +66,47 @@ UNIT_TEST(chx,chy,chz,chc,opname) \ INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3s); +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chz,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto y0 = convert( convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + bli_tadd3s( chx,chy,chy,chc, x, y, y ); \ +\ + INFO( "y (C++): " << y0 ); \ + INFO( "y (BLIS): " << y ); \ +\ + check( y, y0 ); \ + } \ +\ + for ( auto x : test_values() ) \ + for ( auto y : test_values() ) \ + { \ + auto x0 = convert( convert_prec( x ) + \ + convert_prec( y ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "y: " << y ); \ +\ + bli_tadd3s( chx,chy,chx,chc, x, y, x ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "x (BLIS): " << x ); \ +\ + check( x, x0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3(RC, RC, C, add3s_inplace); + #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc ) \ UNIT_TEST(chx,chy,chz,chc,opname) \ diff --git a/test/level0/test_taxpbys.cxx b/test/level0/test_taxpbys.cxx index 57b90ea2cb..d595f25058 100644 --- a/test/level0/test_taxpbys.cxx +++ b/test/level0/test_taxpbys.cxx @@ -71,6 +71,34 @@ UNIT_TEST(cha,chx,chb,chy,chc,opname) \ INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, R, axpbys ) +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypec, chc ) \ +UNIT_TEST(cha,chx,chb,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( auto b : test_values() ) \ + { \ + auto x0 = convert( convert_prec( a ) * \ + convert_prec( x ) + \ + convert_prec( b ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ +\ + bli_taxpbys( cha,chx,chb,chx,chc, a, x, b, x ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "x (BLIS): " << x ); \ +\ + check( x, x0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX4( RC, RC, RC, R, axpbys_inplace ) + #undef GENTFUNC #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chb,chy,chc,opname) \ diff --git a/test/level0/test_taxpys.cxx b/test/level0/test_taxpys.cxx index 6e3b3d3886..bf80277977 100644 --- a/test/level0/test_taxpys.cxx +++ b/test/level0/test_taxpys.cxx @@ -68,6 +68,31 @@ UNIT_TEST(cha,chx,chy,chc,opname) \ INSERT_GENTFUNC_MIX4( RC, RC, RC, R, axpys ) +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto x0 = convert( convert_prec( a ) * \ + convert_prec( x ) + \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + bli_taxpys( cha,chx,chx,chc, a, x, x ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "x (BLIS): " << x ); \ +\ + check( x, x0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, axpys_inplace ) + #undef GENTFUNC #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chy,chc,opname) \ diff --git a/test/level0/test_tconjs.cxx b/test/level0/test_tconjs.cxx index 2216127f48..6083297f8d 100644 --- a/test/level0/test_tconjs.cxx +++ b/test/level0/test_tconjs.cxx @@ -60,7 +60,7 @@ UNIT_TEST(chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX1( C, conjs ) +INSERT_GENTFUNC_MIX1( RC, conjs ) #undef GENTFUNC #define GENTFUNC( opname, ctypey, chy ) \ @@ -81,4 +81,4 @@ UNIT_TEST(chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX1( C, conjris ) +INSERT_GENTFUNC_MIX1( RC, conjris ) diff --git a/test/level0/test_tcopycjs.cxx b/test/level0/test_tcopycjs.cxx index bfc58edfd2..b113e51590 100644 --- a/test/level0/test_tcopycjs.cxx +++ b/test/level0/test_tcopycjs.cxx @@ -63,7 +63,30 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( RC, R, copycjs ) +INSERT_GENTFUNC_MIX2( RC, RC, copycjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto x : test_values() ) \ + { \ + auto x0 = convert( bli_is_conj( conjx ) ? conj( x ) : x ); \ +\ + INFO( "conjx: " << bli_is_conj( conjx ) ); \ + INFO( "x: " << x ); \ +\ + bli_tcopycjs( chx,chx, conjx, x, x ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "x (BLIS): " << x ); \ +\ + check( x, x0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, copycjs_inplace ) #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ @@ -89,4 +112,4 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( RC, R, copycjris ) +INSERT_GENTFUNC_MIX2( RC, RC, copycjris ) diff --git a/test/level0/test_tcopynzs.cxx b/test/level0/test_tcopynzs.cxx index de0d84c951..ce0b1b4814 100644 --- a/test/level0/test_tcopynzs.cxx +++ b/test/level0/test_tcopynzs.cxx @@ -65,7 +65,7 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( RC, R, copynzs ) +INSERT_GENTFUNC_MIX2( RC, RC, copynzs ) #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ @@ -91,7 +91,7 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( RC, R, copyjnzs ) +INSERT_GENTFUNC_MIX2( RC, RC, copyjnzs ) #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ @@ -119,7 +119,7 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( RC, R, copynzris ) +INSERT_GENTFUNC_MIX2( RC, RC, copynzris ) #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ @@ -147,4 +147,4 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( RC, R, copyjnzris ) +INSERT_GENTFUNC_MIX2( RC, RC, copyjnzris ) diff --git a/test/level0/test_tcopys.cxx b/test/level0/test_tcopys.cxx index 111c077739..a805c93e98 100644 --- a/test/level0/test_tcopys.cxx +++ b/test/level0/test_tcopys.cxx @@ -61,7 +61,7 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( RC, R, copys ) +INSERT_GENTFUNC_MIX2( RC, RC, copys ) #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ @@ -83,7 +83,28 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( RC, R, copyjs ) +INSERT_GENTFUNC_MIX2( RC, RC, copyjs ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto x0 = convert( conj( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + bli_tcopyjs( chx,chx, x, x ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "x (BLIS): " << x ); \ +\ + check( x, x0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, copyjs_inplace ) #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ @@ -107,7 +128,7 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( RC, R, copyris ) +INSERT_GENTFUNC_MIX2( RC, RC, copyris ) #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ @@ -131,7 +152,7 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( RC, R, copyjris ) +INSERT_GENTFUNC_MIX2( RC, RC, copyjris ) #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ @@ -157,7 +178,7 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( C, R, copy1es ) +INSERT_GENTFUNC_MIX2( C, C, copy1es ) #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ @@ -183,7 +204,7 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( C, R, copyj1es ) +INSERT_GENTFUNC_MIX2( C, C, copyj1es ) #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ @@ -205,7 +226,7 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( C, R, copy1rs ) +INSERT_GENTFUNC_MIX2( C, C, copy1rs ) #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ @@ -218,7 +239,7 @@ UNIT_TEST(chx,chy,opname) \ INFO( "x: " << x ); \ \ ctypey y; \ - bli_tcopy1rs( chx,chy, x, real( y ), imag( y ) ); \ + bli_tcopyj1rs( chx,chy, x, real( y ), imag( y ) ); \ \ INFO( "y (C++): " << y0 ); \ INFO( "y (BLIS): " << y ); \ @@ -227,7 +248,7 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( C, R, copyj1rs ) +INSERT_GENTFUNC_MIX2( C, C, copyj1rs ) #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ @@ -277,4 +298,4 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( RC, R, copys_mxn ) +INSERT_GENTFUNC_MIX2( RC, RC, copys_mxn ) diff --git a/test/level0/test_tneg2s.cxx b/test/level0/test_tneg2s.cxx index c70f6270a9..28aa3e1788 100644 --- a/test/level0/test_tneg2s.cxx +++ b/test/level0/test_tneg2s.cxx @@ -61,7 +61,28 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( RC, R, neg2s ) +INSERT_GENTFUNC_MIX2( RC, RC, neg2s ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx ) \ +UNIT_TEST(chx,opname) \ +( \ + for ( auto x : test_values() ) \ + { \ + auto x0 = -x; \ +\ + INFO( "x: " << x ); \ +\ + bli_tneg2s( chx,chx, x, x ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "x (BLIS): " << x ); \ +\ + check( x, x0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX1( RC, neg2s_inplace ) #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ @@ -85,4 +106,4 @@ UNIT_TEST(chx,chy,opname) \ } \ ) -INSERT_GENTFUNC_MIX2( RC, R, neg2ris ) +INSERT_GENTFUNC_MIX2( RC, RC, neg2ris ) diff --git a/test/level0/test_tscal2s.cxx b/test/level0/test_tscal2s.cxx index bcc0e105eb..1cacf7fef0 100644 --- a/test/level0/test_tscal2s.cxx +++ b/test/level0/test_tscal2s.cxx @@ -66,6 +66,30 @@ UNIT_TEST(cha,chx,chy,chc,opname) \ INSERT_GENTFUNC_MIX4( RC, RC, RC, R, scal2s ) +#undef GENTFUNC +#define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ +UNIT_TEST(cha,chx,chc,opname) \ +( \ + for ( auto a : test_values() ) \ + for ( auto x : test_values() ) \ + { \ + auto x0 = convert( convert_prec( a ) * \ + convert_prec( x ) ); \ +\ + INFO( "a: " << a ); \ + INFO( "x: " << x ); \ +\ + bli_tscal2s( cha,chx,chx,chc, a, x, x ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "x (BLIS): " << x ); \ +\ + check( x, x0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, scal2s_inplace ) + #undef GENTFUNC #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chy,chc,opname) \ @@ -274,7 +298,7 @@ UNIT_TEST(cha,chx,chy,chc,PASTECH(opname,_,D)) \ { \ auto xmn = tile( x ); \ auto ymn00 = tile(); \ - auto ymn = tile(); \ + auto ymn = tile(); \ \ INFO("column-major"); \ \ @@ -285,7 +309,7 @@ UNIT_TEST(cha,chx,chy,chc,PASTECH(opname,_,D)) \ INFO( "a: " << a ); \ INFO( "x:\n" << xmn ); \ \ - bli_tscal2bbs_mxn( cha,chx,chy,chc, conjx, N, M, &a, &xmn[0][0], 1, N, &ymn[0][0], D, N ); \ + bli_tscal2bbs_mxn( cha,chx,chy,chc, conjx, N, M, &a, &xmn[0][0], 1, N, &ymn[0][0], D, D*N ); \ \ INFO( "y (C++):\n" << ymn0 ); \ INFO( "y (BLIS):\n" << ymn ); \ diff --git a/test/level0/test_tscalcjs.cxx b/test/level0/test_tscalcjs.cxx index d4c4bba714..058f64c7b6 100644 --- a/test/level0/test_tscalcjs.cxx +++ b/test/level0/test_tscalcjs.cxx @@ -49,9 +49,10 @@ UNIT_TEST(cha,chx,chc,opname) \ for ( auto a : test_values() ) \ for ( auto x : test_values() ) \ { \ - auto y0 = convert( convert_prec( a ) * \ - convert_prec( bli_is_conj( conjx ) ? conj( x ) : x ) ); \ + auto y0 = convert( convert_prec( bli_is_conj( conjx ) ? conj( a ) : a ) * \ + convert_prec( x ) ); \ \ + INFO( "conjx: " << conjx ); \ INFO( "a: " << a ); \ INFO( "x: " << x ); \ \ diff --git a/test/level0/test_tsets.cxx b/test/level0/test_tsets.cxx index 1480ec8243..b5f5c789e4 100644 --- a/test/level0/test_tsets.cxx +++ b/test/level0/test_tsets.cxx @@ -299,15 +299,15 @@ UNIT_TEST(chy,PASTECH(opname,_,D)) \ \ for ( auto y : test_values() ) \ { \ - auto ymn = tile( y ); \ + auto ymn = tile( y ); \ \ INFO( "column-major" ); \ \ - auto ymn0 = tile( convert( 0.0 ) ); \ + auto ymn0 = tile( convert( 0.0 ) ); \ \ INFO( "y (init):\n" << ymn ); \ \ - bli_tset0s_mxn( chy, N, M, &ymn[0][0], 1, N ); \ + bli_tset0bbs_mxn( chy, N, M, &ymn[0][0], D, D*N ); \ \ INFO( "y (C++):\n" << ymn0 ); \ INFO( "y (BLIS):\n" << ymn ); \ @@ -349,7 +349,7 @@ UNIT_TEST(chy,PASTECH(opname,_,M,_,N)) \ \ INFO( "y (init):\n" << ymn ); \ \ - bli_tset0s_edge( chy, M, M0, N, N0, &ymn[0][0], M0 ); \ + bli_tset0s_edge( chy, N, N0, M, M0, &ymn[0][0], N0 ); \ \ INFO( "y (C++):\n" << ymn0 ); \ INFO( "y (BLIS):\n" << ymn ); \ From 15d36b1ee29f4e1603f1537d3232320b25ae7523 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Mon, 4 Nov 2024 17:54:24 -0600 Subject: [PATCH 09/19] Work around preprocessor bug in gcc 11 and older. --- ref_kernels/1m/bli_packm_cxk_ref.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ref_kernels/1m/bli_packm_cxk_ref.c b/ref_kernels/1m/bli_packm_cxk_ref.c index 11b692af35..6b70ddda70 100644 --- a/ref_kernels/1m/bli_packm_cxk_ref.c +++ b/ref_kernels/1m/bli_packm_cxk_ref.c @@ -34,6 +34,15 @@ #include "blis.h" +// Apparently gcc 11 and older have a bug where the _Pragma +// erroneously moves to the beginning of the entire macro +// body (e.g. just before "do") +#ifdef __GNUC__ +#if __GNUC__ < 12 +#undef PRAGMA_SIMD +#define PRAGMA_SIMD +#endif +#endif #define PACKM_BODY_r( ctypea, ctypep, cha, chp, pragma, cdim, dfac, inca, op ) \ \ From 97da69cd38431659ba302db60ca6337b7a5ed5fd Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Fri, 15 Nov 2024 15:00:39 -0600 Subject: [PATCH 10/19] Update Multithreading.md Revert typo in docs. [ci skip] --- docs/Multithreading.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Multithreading.md b/docs/Multithreading.md index f9ba29dad3..d8f8b13f40 100644 --- a/docs/Multithreading.md +++ b/docs/Multithreading.md @@ -38,7 +38,7 @@ To summarize: In order to observe multithreaded parallelism within a BLIS operat BLIS disables multithreading by default. In order to allow multithreaded parallelism from BLIS, you must first enable multithreading explicitly at configure-time. -As of this writing, BLIS optionally supports multithreading via OpenMP or POSIX bli_threads(or both). +As of this writing, BLIS optionally supports multithreading via OpenMP or POSIX threads(or both). To enable multithreading via OpenMP, you must provide the `--enable-threading` option to the `configure` script: ``` From 7eb841cb20c565e683c8379906f13f63bedfa16c Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Sun, 24 Nov 2024 19:46:12 -0600 Subject: [PATCH 11/19] Comment update. --- frame/include/level0/bli_tdots.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/frame/include/level0/bli_tdots.h b/frame/include/level0/bli_tdots.h index e1f9e76075..4a4b0f46b3 100644 --- a/frame/include/level0/bli_tdots.h +++ b/frame/include/level0/bli_tdots.h @@ -44,8 +44,12 @@ // -- API macros --------------------------------------------------------------- -// NOTE: The first two operands must be swapped, because axpy conjugates -// x (the first operand), while dot conjugates y (the second operand). +// NOTE: When defining the tdots macros, we can recycle taxpys since they both +// perform c += a * b. However, when invoking taxpys, the first two operands +// passed in must be swapped because in BLIS axpy is set up to conjugate its +// second operand (ie: the second operand to the a*x product) while dot +// is set up to conjugate its first operand (ie: the first operand to the x*y +// product). // -- Consolidated -- From ab6172476b3734c78a187ad7a48acdefd982f8d4 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 22 Jan 2025 11:58:58 -0600 Subject: [PATCH 12/19] Undo changes to optimized kernels and vendor code. A compatibility layer will be added. --- frame/2/gemv/amd/bli_gemv_unf_var2_amd.c | 4 +- frame/compat/amd/bla_gemv_amd.c | 2 +- .../armsve/1m/bli_dpackm_armsve256_int_8x10.c | 10 ++-- .../1m/bli_dpackm_armsve512_asm_16x10.c | 10 ++-- kernels/armv8a/1m/bli_packm_armv8a_int_d6x8.c | 10 ++-- .../armv8a/1m/bli_packm_armv8a_int_s8x12.c | 10 ++-- kernels/bgq/1/bli_dotv_bgq_int.c | 2 +- kernels/bgq/1f/bli_axpyf_bgq_int.c | 16 ++--- .../haswell/1m/bli_packm_haswell_asm_c3x8.c | 10 ++-- .../haswell/1m/bli_packm_haswell_asm_d6x8.c | 10 ++-- .../haswell/1m/bli_packm_haswell_asm_s6x16.c | 10 ++-- .../haswell/1m/bli_packm_haswell_asm_z3x4.c | 10 ++-- .../sup/d6x8/bli_gemmsup_r_haswell_ref_dMx1.c | 14 ++--- .../s6x16/bli_gemmsup_r_haswell_ref_sMx1.c | 14 ++--- kernels/knl/1m/bli_dpackm_knl_asm_24x8.c | 8 +-- kernels/knl/1m/bli_spackm_knl_asm_24x16.c | 8 +-- kernels/penryn/1/bli_dotv_penryn_int.c | 6 +- kernels/penryn/1f/bli_axpyf_penryn_int.c | 8 +-- kernels/penryn/1f/bli_dotaxpyv_penryn_int.c | 4 +- kernels/penryn/1f/bli_dotxaxpyf_penryn_int.c | 18 +++--- kernels/penryn/1f/bli_dotxf_penryn_int.c | 26 ++++----- kernels/zen/1/bli_amaxv_zen_int.c | 44 +++++++------- kernels/zen/1/bli_axpyv_zen_int.c | 4 +- kernels/zen/1/bli_axpyv_zen_int10.c | 4 +- kernels/zen/1/bli_dotv_zen_int.c | 12 ++-- kernels/zen/1/bli_dotv_zen_int10.c | 12 ++-- kernels/zen/1/bli_dotxv_zen_int.c | 20 +++---- kernels/zen/1/bli_scalv_zen_int.c | 8 +-- kernels/zen/1/bli_scalv_zen_int10.c | 8 +-- kernels/zen/1/bli_swapv_zen_int8.c | 8 +-- kernels/zen/1f/bli_axpyf_zen_int_4.c | 22 +++---- kernels/zen/1f/bli_axpyf_zen_int_5.c | 58 +++++++++---------- kernels/zen/1f/bli_axpyf_zen_int_8.c | 44 +++++++------- kernels/zen/1f/bli_dotxf_zen_int_8.c | 8 +-- kernels/zen/3/bli_gemmt_small.c | 56 +++++++++--------- vendor/testcpp/test_sdsdot.cc | 10 ++-- 36 files changed, 254 insertions(+), 274 deletions(-) diff --git a/frame/2/gemv/amd/bli_gemv_unf_var2_amd.c b/frame/2/gemv/amd/bli_gemv_unf_var2_amd.c index 8a32c277e6..b80916adc0 100644 --- a/frame/2/gemv/amd/bli_gemv_unf_var2_amd.c +++ b/frame/2/gemv/amd/bli_gemv_unf_var2_amd.c @@ -82,7 +82,7 @@ void PASTEMAC(ch,varname) \ ); \ \ /* If alpha == 0, then we are done. */ \ - if ( bli_teq0s( ch, *alpha ) ) return; \ + if ( PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Query the context for the kernel function pointer and fusing factor. */ \ /*axpyf_ker_ft kfp_af = bli_cntx_get_l1f_ker_dt( dt, BLIS_AXPYF_KER, cntx );*/ \ @@ -158,7 +158,7 @@ void PASTEMAC(ch,varname) \ conja = bli_extract_conj( transa ); \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ - if ( bli_teq0s( ch, *beta ) ) \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC(ch,setv,BLIS_TAPI_EX_SUF) \ diff --git a/frame/compat/amd/bla_gemv_amd.c b/frame/compat/amd/bla_gemv_amd.c index 7dbb8e4361..5cd523f178 100644 --- a/frame/compat/amd/bla_gemv_amd.c +++ b/frame/compat/amd/bla_gemv_amd.c @@ -116,7 +116,7 @@ void PASTEF77(ch,blasname) \ bli_convert_blas_incv( m_y, (ftype*)y, *incy, y0, incy0 ); \ \ /* If alpha is zero, scale y by beta and return early. */ \ - if ( bli_teq0s( ch, *alpha ) ) \ + if ( PASTEMAC(ch,eq0)( *alpha ) ) \ { \ PASTEMAC(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ diff --git a/kernels/armsve/1m/bli_dpackm_armsve256_int_8x10.c b/kernels/armsve/1m/bli_dpackm_armsve256_int_8x10.c index 521f27577c..1665b539c5 100644 --- a/kernels/armsve/1m/bli_dpackm_armsve256_int_8x10.c +++ b/kernels/armsve/1m/bli_dpackm_armsve256_int_8x10.c @@ -79,7 +79,7 @@ void bli_dpackm_armsve256_int_8x10 if ( cdim == mr && cdim_bcast == 1 ) { - if ( bli_teq1s( d, *(( double* )kappa) ) ) + if ( bli_deq1( *(( double* )kappa) ) ) { if ( inca == 1 ) // continous memory. packA style { @@ -183,9 +183,8 @@ void bli_dpackm_armsve256_int_8x10 } else { - bli_tscal2bbs_mxn + bli_dscal2bbs_mxn ( - d,d,d,d, conja, cdim_, n_, @@ -195,12 +194,11 @@ void bli_dpackm_armsve256_int_8x10 ); } - bli_tset0s_edge + bli_dset0s_edge ( - d, cdim_*cdim_bcast, cdim_max*cdim_bcast, n_, n_max_, - (double*)p, ldp + p, ldp ); } diff --git a/kernels/armsve/1m/bli_dpackm_armsve512_asm_16x10.c b/kernels/armsve/1m/bli_dpackm_armsve512_asm_16x10.c index ed96082a85..5981f392ab 100644 --- a/kernels/armsve/1m/bli_dpackm_armsve512_asm_16x10.c +++ b/kernels/armsve/1m/bli_dpackm_armsve512_asm_16x10.c @@ -66,7 +66,7 @@ void bli_dpackm_armsve512_asm_16x10 const int64_t lda = lda_; const int64_t ldp = ldp_; const bool gs = inca != 1 && lda != 1; - const bool unitk = bli_teq1s( d, *(( double* )kappa) ); + const bool unitk = bli_deq1( *(( double* )kappa) ); // This never would have worked in the first place since GEMM packing used // BLIS_PACKED_ROW_PANELS and BLIS_PACKED_COL_PANELS, but with the removal @@ -566,9 +566,8 @@ void bli_dpackm_armsve512_asm_16x10 } else { - bli_tscal2bbs_mxn + bli_dscal2bbs_mxn ( - d,d,d,d, conja, cdim_, n_, @@ -578,11 +577,10 @@ void bli_dpackm_armsve512_asm_16x10 ); } - bli_tset0s_edge + bli_dset0s_edge ( - d, cdim_*cdim_bcast, cdim_max*cdim_bcast, n_, n_max_, - (double*)p, ldp + p, ldp ); } diff --git a/kernels/armv8a/1m/bli_packm_armv8a_int_d6x8.c b/kernels/armv8a/1m/bli_packm_armv8a_int_d6x8.c index 51fa48dd5f..4242e4efdf 100644 --- a/kernels/armv8a/1m/bli_packm_armv8a_int_d6x8.c +++ b/kernels/armv8a/1m/bli_packm_armv8a_int_d6x8.c @@ -91,7 +91,7 @@ void bli_dpackm_armv8a_int_6x8 // NOTE: If/when this kernel ever supports scaling by kappa within the // assembly region, this constraint should be lifted. - const bool unitk = bli_teq1s( d, *(( double* )kappa) ); + const bool unitk = bli_deq1( *(( double* )kappa) ); // ------------------------------------------------------------------------- @@ -478,9 +478,8 @@ void bli_dpackm_armv8a_int_6x8 } else { - bli_tscal2bbs_mxn + bli_dscal2bbs_mxn ( - d,d,d,d, conja, cdim0, k0, @@ -490,12 +489,11 @@ void bli_dpackm_armv8a_int_6x8 ); } - bli_tset0s_edge + bli_dset0s_edge ( - d, cdim0*cdim_bcast, cdim_max*cdim_bcast, k0, k0_max, - (double*)p, ldp + p, ldp ); } diff --git a/kernels/armv8a/1m/bli_packm_armv8a_int_s8x12.c b/kernels/armv8a/1m/bli_packm_armv8a_int_s8x12.c index 805166327e..df1e6178f4 100644 --- a/kernels/armv8a/1m/bli_packm_armv8a_int_s8x12.c +++ b/kernels/armv8a/1m/bli_packm_armv8a_int_s8x12.c @@ -94,7 +94,7 @@ void bli_spackm_armv8a_int_8x12 // NOTE: If/when this kernel ever supports scaling by kappa within the // assembly region, this constraint should be lifted. - const bool unitk = bli_teq1s( s, *(( float* )kappa) ); + const bool unitk = bli_seq1( *(( float* )kappa) ); // ------------------------------------------------------------------------- @@ -617,9 +617,8 @@ void bli_spackm_armv8a_int_8x12 } else { - bli_tscal2bbs_mxn + bli_sscal2bbs_mxn ( - s,s,s,s, conja, cdim0, k0, @@ -629,12 +628,11 @@ void bli_spackm_armv8a_int_8x12 ); } - bli_tset0s_edge + bli_sset0s_edge ( - s, cdim0*cdim_bcast, cdim_max*cdim_bcast, k0, k0_max, - (float*)p, ldp + p, ldp ); } diff --git a/kernels/bgq/1/bli_dotv_bgq_int.c b/kernels/bgq/1/bli_dotv_bgq_int.c index c5a0b09013..1774850032 100644 --- a/kernels/bgq/1/bli_dotv_bgq_int.c +++ b/kernels/bgq/1/bli_dotv_bgq_int.c @@ -53,7 +53,7 @@ void bli_ddotv_bgq_int // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { - bli_tset0s( d, *rho ); + PASTEMAC(d,set0s)( *rho ); return; } // If there is anything that would interfere with our use of aligned diff --git a/kernels/bgq/1f/bli_axpyf_bgq_int.c b/kernels/bgq/1f/bli_axpyf_bgq_int.c index f3b92220b5..1bf82380ae 100644 --- a/kernels/bgq/1f/bli_axpyf_bgq_int.c +++ b/kernels/bgq/1f/bli_axpyf_bgq_int.c @@ -110,14 +110,14 @@ void bli_daxpyf_bgq_int double chi6 = *(x + 6*incx); double chi7 = *(x + 7*incx); - bli_tscals( d,d,d, *alpha, chi0 ); - bli_tscals( d,d,d, *alpha, chi1 ); - bli_tscals( d,d,d, *alpha, chi2 ); - bli_tscals( d,d,d, *alpha, chi3 ); - bli_tscals( d,d,d, *alpha, chi4 ); - bli_tscals( d,d,d, *alpha, chi5 ); - bli_tscals( d,d,d, *alpha, chi6 ); - bli_tscals( d,d,d, *alpha, chi7 ); + PASTEMAC(d,d,scals)( *alpha, chi0 ); + PASTEMAC(d,d,scals)( *alpha, chi1 ); + PASTEMAC(d,d,scals)( *alpha, chi2 ); + PASTEMAC(d,d,scals)( *alpha, chi3 ); + PASTEMAC(d,d,scals)( *alpha, chi4 ); + PASTEMAC(d,d,scals)( *alpha, chi5 ); + PASTEMAC(d,d,scals)( *alpha, chi6 ); + PASTEMAC(d,d,scals)( *alpha, chi7 ); vector4double a0v, a1v, a2v, a3v, a4v, a5v, a6v, a7v; vector4double yv; diff --git a/kernels/haswell/1m/bli_packm_haswell_asm_c3x8.c b/kernels/haswell/1m/bli_packm_haswell_asm_c3x8.c index aa67025237..87ddb79579 100644 --- a/kernels/haswell/1m/bli_packm_haswell_asm_c3x8.c +++ b/kernels/haswell/1m/bli_packm_haswell_asm_c3x8.c @@ -91,7 +91,7 @@ void bli_cpackm_haswell_asm_3x8 // NOTE: If/when this kernel ever supports scaling by kappa within the // assembly region, this constraint should be lifted. - const bool unitk = bli_teq1s( c, *(( scomplex* )kappa) ); + const bool unitk = bli_ceq1( *(( scomplex* )kappa) ); // ------------------------------------------------------------------------- @@ -590,9 +590,8 @@ void bli_cpackm_haswell_asm_3x8 } else { - bli_tscal2bbs_mxn + bli_cscal2bbs_mxn ( - c,c,c,c, conja, cdim0, k0, @@ -602,12 +601,11 @@ void bli_cpackm_haswell_asm_3x8 ); } - bli_tset0s_edge + bli_cset0s_edge ( - c, cdim0*cdim_bcast, cdim_max*cdim_bcast, k0, k0_max, - (scomplex*)p, ldp + p, ldp ); } diff --git a/kernels/haswell/1m/bli_packm_haswell_asm_d6x8.c b/kernels/haswell/1m/bli_packm_haswell_asm_d6x8.c index 02aba0cb66..ef6d66987f 100644 --- a/kernels/haswell/1m/bli_packm_haswell_asm_d6x8.c +++ b/kernels/haswell/1m/bli_packm_haswell_asm_d6x8.c @@ -91,7 +91,7 @@ void bli_dpackm_haswell_asm_6x8 // NOTE: If/when this kernel ever supports scaling by kappa within the // assembly region, this constraint should be lifted. - const bool unitk = bli_teq1s( d, *(( double* )kappa) ); + const bool unitk = bli_deq1( *(( double* )kappa) ); // ------------------------------------------------------------------------- @@ -588,9 +588,8 @@ void bli_dpackm_haswell_asm_6x8 } else { - bli_tscal2bbs_mxn + bli_dscal2bbs_mxn ( - d,d,d,d, conja, cdim0, k0, @@ -600,12 +599,11 @@ void bli_dpackm_haswell_asm_6x8 ); } - bli_tset0s_edge + bli_dset0s_edge ( - d, cdim0*cdim_bcast, cdim_max*cdim_bcast, k0, k0_max, - (double*)p, ldp + p, ldp ); } diff --git a/kernels/haswell/1m/bli_packm_haswell_asm_s6x16.c b/kernels/haswell/1m/bli_packm_haswell_asm_s6x16.c index 614a51740a..fbab3983db 100644 --- a/kernels/haswell/1m/bli_packm_haswell_asm_s6x16.c +++ b/kernels/haswell/1m/bli_packm_haswell_asm_s6x16.c @@ -91,7 +91,7 @@ void bli_spackm_haswell_asm_6x16 // NOTE: If/when this kernel ever supports scaling by kappa within the // assembly region, this constraint should be lifted. - const bool unitk = bli_teq1s( s, *(( float* )kappa) ); + const bool unitk = bli_seq1( *(( float* )kappa) ); // ------------------------------------------------------------------------- @@ -789,9 +789,8 @@ void bli_spackm_haswell_asm_6x16 } else { - bli_tscal2bbs_mxn + bli_sscal2bbs_mxn ( - s,s,s,s, conja, cdim0, k0, @@ -801,12 +800,11 @@ void bli_spackm_haswell_asm_6x16 ); } - bli_tset0s_edge + bli_sset0s_edge ( - s, cdim0*cdim_bcast, cdim_max*cdim_bcast, k0, k0_max, - (float*)p, ldp + p, ldp ); } diff --git a/kernels/haswell/1m/bli_packm_haswell_asm_z3x4.c b/kernels/haswell/1m/bli_packm_haswell_asm_z3x4.c index 6706bacb68..e5d9da4f33 100644 --- a/kernels/haswell/1m/bli_packm_haswell_asm_z3x4.c +++ b/kernels/haswell/1m/bli_packm_haswell_asm_z3x4.c @@ -91,7 +91,7 @@ void bli_zpackm_haswell_asm_3x4 // NOTE: If/when this kernel ever supports scaling by kappa within the // assembly region, this constraint should be lifted. - const bool unitk = bli_teq1s( z, *(( dcomplex* )kappa) ); + const bool unitk = bli_zeq1( *(( dcomplex* )kappa) ); // ------------------------------------------------------------------------- @@ -592,9 +592,8 @@ void bli_zpackm_haswell_asm_3x4 } else { - bli_tscal2bbs_mxn + bli_zscal2bbs_mxn ( - z,z,z,z, conja, cdim0, k0, @@ -604,12 +603,11 @@ void bli_zpackm_haswell_asm_3x4 ); } - bli_tset0s_edge + bli_zset0s_edge ( - z, cdim0*cdim_bcast, cdim_max*cdim_bcast, k0, k0_max, - (dcomplex*)p, ldp + p, ldp ); } diff --git a/kernels/haswell/3/sup/d6x8/bli_gemmsup_r_haswell_ref_dMx1.c b/kernels/haswell/3/sup/d6x8/bli_gemmsup_r_haswell_ref_dMx1.c index 9494ce2362..ca15842c4d 100644 --- a/kernels/haswell/3/sup/d6x8/bli_gemmsup_r_haswell_ref_dMx1.c +++ b/kernels/haswell/3/sup/d6x8/bli_gemmsup_r_haswell_ref_dMx1.c @@ -125,7 +125,7 @@ void PASTEMAC(ch,opname) \ const ctype* bj = b /*[ j*cs_b ]*/ ; \ ctype ab; \ \ - bli_tset0s( ch, ab ); \ + PASTEMAC(ch,set0s)( ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ @@ -133,23 +133,23 @@ void PASTEMAC(ch,opname) \ const ctype* aij = &ai[ l*cs_a ]; \ const ctype* bij = &bj[ l*rs_b ]; \ \ - bli_tdots( ch,ch,ch,ch, *aij, *bij, ab ); \ + PASTEMAC(ch,dots)( *aij, *bij, ab ); \ } \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ - if ( bli_teq1s( ch, *beta ) ) \ + if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ - bli_taxpys( ch,ch,ch,ch, *alpha, ab, *cij ); \ + PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ } \ - else if ( bli_teq0s( d, *beta ) ) \ + else if ( PASTEMAC(d,eq0)( *beta ) ) \ { \ - bli_tscal2s( ch,ch,ch,ch, *alpha, ab, *cij ); \ + PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ } \ else \ { \ - bli_taxpbys( ch,ch,ch,ch,ch, *alpha, ab, *beta, *cij ); \ + PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ } \ } \ } \ diff --git a/kernels/haswell/3/sup/s6x16/bli_gemmsup_r_haswell_ref_sMx1.c b/kernels/haswell/3/sup/s6x16/bli_gemmsup_r_haswell_ref_sMx1.c index a3a646342c..9966283df9 100644 --- a/kernels/haswell/3/sup/s6x16/bli_gemmsup_r_haswell_ref_sMx1.c +++ b/kernels/haswell/3/sup/s6x16/bli_gemmsup_r_haswell_ref_sMx1.c @@ -124,7 +124,7 @@ void PASTEMAC(ch,opname) \ const ctype* bj = b /*[ j*cs_b ]*/ ; \ ctype ab; \ \ - bli_tset0s( ch, ab ); \ + PASTEMAC(ch,set0s)( ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ @@ -132,23 +132,23 @@ void PASTEMAC(ch,opname) \ const ctype* aij = &ai[ l*cs_a ]; \ const ctype* bij = &bj[ l*rs_b ]; \ \ - bli_tdots( ch,ch,ch,ch, *aij, *bij, ab ); \ + PASTEMAC(ch,dots)( *aij, *bij, ab ); \ } \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ - if ( bli_teq1s( ch, *beta ) ) \ + if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ - bli_taxpys( ch,ch,ch,ch, *alpha, ab, *cij ); \ + PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ } \ - else if ( bli_teq0s( d, *beta ) ) \ + else if ( PASTEMAC(d,eq0)( *beta ) ) \ { \ - bli_tscal2s( ch,ch,ch,ch, *alpha, ab, *cij ); \ + PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ } \ else \ { \ - bli_taxpbys( ch,ch,ch,ch,ch, *alpha, ab, *beta, *cij ); \ + PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ } \ } \ } \ diff --git a/kernels/knl/1m/bli_dpackm_knl_asm_24x8.c b/kernels/knl/1m/bli_dpackm_knl_asm_24x8.c index 96b57ee77c..b081ab3d0b 100644 --- a/kernels/knl/1m/bli_dpackm_knl_asm_24x8.c +++ b/kernels/knl/1m/bli_dpackm_knl_asm_24x8.c @@ -544,9 +544,8 @@ void bli_dpackm_knl_asm_24x8 } else { - bli_tscal2bbs_mxn + bli_dscal2bbs_mxn ( - d,d,d,d, BLIS_NO_CONJUGATE, cdim, n, @@ -556,11 +555,10 @@ void bli_dpackm_knl_asm_24x8 ); } - bli_tset0s_edge + bli_dset0s_edge ( - d, cdim*cdim_bcast, cdim_max*cdim_bcast, n, n_max, - (double*)p, ldp + p, ldp ); } diff --git a/kernels/knl/1m/bli_spackm_knl_asm_24x16.c b/kernels/knl/1m/bli_spackm_knl_asm_24x16.c index 7ef428dac2..78b41ae229 100644 --- a/kernels/knl/1m/bli_spackm_knl_asm_24x16.c +++ b/kernels/knl/1m/bli_spackm_knl_asm_24x16.c @@ -561,9 +561,8 @@ void bli_spackm_knl_asm_24x16 } else { - bli_tscal2bbs_mxn + bli_sscal2bbs_mxn ( - s,s,s,s, BLIS_NO_CONJUGATE, cdim, n, @@ -573,11 +572,10 @@ void bli_spackm_knl_asm_24x16 ); } - bli_tset0s_edge + bli_sset0s_edge ( - s, cdim*cdim_bcast, cdim_max*cdim_bcast, n, n_max, - (float*)p, ldp + p, ldp ); } diff --git a/kernels/penryn/1/bli_dotv_penryn_int.c b/kernels/penryn/1/bli_dotv_penryn_int.c index ca12911fca..83f44309f3 100644 --- a/kernels/penryn/1/bli_dotv_penryn_int.c +++ b/kernels/penryn/1/bli_dotv_penryn_int.c @@ -73,7 +73,7 @@ void bli_ddotv_penryn_int // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { - bli_tset0s( d, *rho_cast ); + PASTEMAC(d,set0s)( *rho_cast ); return; } @@ -122,7 +122,7 @@ void bli_ddotv_penryn_int const double* restrict x1 = x_cast; const double* restrict y1 = y_cast; - bli_tset0s( d, rho1 ); + PASTEMAC(d,set0s)( rho1 ); if ( n_pre == 1 ) { @@ -166,5 +166,5 @@ void bli_ddotv_penryn_int } } - bli_tcopys( d,d, rho1, *rho_cast ); + PASTEMAC(d,copys)( rho1, *rho_cast ); } diff --git a/kernels/penryn/1f/bli_axpyf_penryn_int.c b/kernels/penryn/1f/bli_axpyf_penryn_int.c index 859c26e26b..3ac75f424e 100644 --- a/kernels/penryn/1f/bli_axpyf_penryn_int.c +++ b/kernels/penryn/1f/bli_axpyf_penryn_int.c @@ -144,10 +144,10 @@ void bli_daxpyf_penryn_int chi2 = *(x_cast + 2*incx); chi3 = *(x_cast + 3*incx); - bli_tscals( d,d,d, *alpha_cast, chi0 ); - bli_tscals( d,d,d, *alpha_cast, chi1 ); - bli_tscals( d,d,d, *alpha_cast, chi2 ); - bli_tscals( d,d,d, *alpha_cast, chi3 ); + PASTEMAC(d,d,scals)( *alpha_cast, chi0 ); + PASTEMAC(d,d,scals)( *alpha_cast, chi1 ); + PASTEMAC(d,d,scals)( *alpha_cast, chi2 ); + PASTEMAC(d,d,scals)( *alpha_cast, chi3 ); if ( m_pre == 1 ) { diff --git a/kernels/penryn/1f/bli_dotaxpyv_penryn_int.c b/kernels/penryn/1f/bli_dotaxpyv_penryn_int.c index a65740a93a..eab3c0bb0a 100644 --- a/kernels/penryn/1f/bli_dotaxpyv_penryn_int.c +++ b/kernels/penryn/1f/bli_dotaxpyv_penryn_int.c @@ -79,7 +79,7 @@ void bli_ddotaxpyv_penryn_int // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { - bli_tset0s( d, *rho_cast ); + PASTEMAC(d,set0s)( *rho_cast ); return; } @@ -138,7 +138,7 @@ void bli_ddotaxpyv_penryn_int //stepy = 2 * incy; //stepz = 2 * incz; - bli_tset0s( d, rho1c ); + PASTEMAC(d,set0s)( rho1c ); alpha1c = *alpha_cast; diff --git a/kernels/penryn/1f/bli_dotxaxpyf_penryn_int.c b/kernels/penryn/1f/bli_dotxaxpyf_penryn_int.c index 96c753aa0d..0148d3f924 100644 --- a/kernels/penryn/1f/bli_dotxaxpyf_penryn_int.c +++ b/kernels/penryn/1f/bli_dotxaxpyf_penryn_int.c @@ -182,15 +182,15 @@ void bli_ddotxaxpyf_penryn_int chi2 = *(x_cast + 2*incx); chi3 = *(x_cast + 3*incx); - bli_tscals( d,d,d, *alpha_cast, chi0 ); - bli_tscals( d,d,d, *alpha_cast, chi1 ); - bli_tscals( d,d,d, *alpha_cast, chi2 ); - bli_tscals( d,d,d, *alpha_cast, chi3 ); - - bli_tset0s( d, rho0 ); - bli_tset0s( d, rho1 ); - bli_tset0s( d, rho2 ); - bli_tset0s( d, rho3 ); + PASTEMAC(d,d,scals)( *alpha_cast, chi0 ); + PASTEMAC(d,d,scals)( *alpha_cast, chi1 ); + PASTEMAC(d,d,scals)( *alpha_cast, chi2 ); + PASTEMAC(d,d,scals)( *alpha_cast, chi3 ); + + PASTEMAC(d,set0s)( rho0 ); + PASTEMAC(d,set0s)( rho1 ); + PASTEMAC(d,set0s)( rho2 ); + PASTEMAC(d,set0s)( rho3 ); if ( m_pre == 1 ) { diff --git a/kernels/penryn/1f/bli_dotxf_penryn_int.c b/kernels/penryn/1f/bli_dotxf_penryn_int.c index 530827ec21..282587b58f 100644 --- a/kernels/penryn/1f/bli_dotxf_penryn_int.c +++ b/kernels/penryn/1f/bli_dotxf_penryn_int.c @@ -157,10 +157,10 @@ void bli_ddotxf_penryn_int const double* restrict x3 = a_cast + 3*lda; const double* restrict y0 = x_cast; - bli_tset0s( d, rho0 ); - bli_tset0s( d, rho1 ); - bli_tset0s( d, rho2 ); - bli_tset0s( d, rho3 ); + PASTEMAC(d,set0s)( rho0 ); + PASTEMAC(d,set0s)( rho1 ); + PASTEMAC(d,set0s)( rho2 ); + PASTEMAC(d,set0s)( rho3 ); if ( m_pre == 1 ) { @@ -269,15 +269,15 @@ void bli_ddotxf_penryn_int } } /* - bli_tscals( d,d,d, *beta_cast, *(y_cast ) ); \ - bli_tscals( d,d,d, *beta_cast, *(y_cast+1) ); \ - bli_tscals( d,d,d, *beta_cast, *(y_cast+2) ); \ - bli_tscals( d,d,d, *beta_cast, *(y_cast+3) ); \ - - bli_taxpys( d,d,d,d, *alpha_cast, rho1, *(y_cast ) ); \ - bli_taxpys( d,d,d,d, *alpha_cast, rho2, *(y_cast+1) ); \ - bli_taxpys( d,d,d,d, *alpha_cast, rho3, *(y_cast+2) ); \ - bli_taxpys( d,d,d,d, *alpha_cast, rho4, *(y_cast+3) ); \ + PASTEMAC(d,d,scals)( *beta_cast, *(y_cast ) ); \ + PASTEMAC(d,d,scals)( *beta_cast, *(y_cast+1) ); \ + PASTEMAC(d,d,scals)( *beta_cast, *(y_cast+2) ); \ + PASTEMAC(d,d,scals)( *beta_cast, *(y_cast+3) ); \ + + PASTEMAC(d,d,d,axpys)( *alpha_cast, rho1, *(y_cast ) ); \ + PASTEMAC(d,d,d,axpys)( *alpha_cast, rho2, *(y_cast+1) ); \ + PASTEMAC(d,d,d,axpys)( *alpha_cast, rho3, *(y_cast+2) ); \ + PASTEMAC(d,d,d,axpys)( *alpha_cast, rho4, *(y_cast+3) ); \ */ rho1v.d[0] = rho0; diff --git a/kernels/zen/1/bli_amaxv_zen_int.c b/kernels/zen/1/bli_amaxv_zen_int.c index e18ecdff99..028e4d6ba0 100644 --- a/kernels/zen/1/bli_amaxv_zen_int.c +++ b/kernels/zen/1/bli_amaxv_zen_int.c @@ -123,17 +123,17 @@ void bli_samaxv_zen_int the behavior of netlib BLAS's i?amax() routines. */ if ( bli_zero_dim1( n ) ) { - bli_tcopys( i,i, *zero_i, *index ); + PASTEMAC(i,copys)( *zero_i, *index ); return; } /* Initialize the index of the maximum absolute value to zero. */ - bli_tcopys( i,i, *zero_i, i_max_l ); + PASTEMAC(i,copys)( *zero_i, i_max_l ); /* Initialize the maximum absolute value search candidate with -1, which is guaranteed to be less than all values we will compute. */ - bli_tcopys( s,s, *minus_one, abs_chi1_max ); + PASTEMAC(s,copys)( *minus_one, abs_chi1_max ); // For non-unit strides, or very small vector lengths, compute with // scalar code. @@ -290,17 +290,17 @@ void bli_damaxv_zen_int the behavior of netlib BLAS's i?amax() routines. */ if ( bli_zero_dim1( n ) ) { - bli_tcopys( i,i, *zero_i, *index ); + PASTEMAC(i,copys)( *zero_i, *index ); return; } /* Initialize the index of the maximum absolute value to zero. */ \ - bli_tcopys( i,i, *zero_i, i_max_l ); + PASTEMAC(i,copys)( *zero_i, i_max_l ); /* Initialize the maximum absolute value search candidate with -1, which is guaranteed to be less than all values we will compute. */ - bli_tcopys( d,d, *minus_one, abs_chi1_max ); + PASTEMAC(d,copys)( *minus_one, abs_chi1_max ); // For non-unit strides, or very small vector lengths, compute with // scalar code. @@ -446,7 +446,7 @@ void PASTEMAC(ch,varname) \ dim_t i; \ \ /* Initialize the index of the maximum absolute value to zero. */ \ - bli_tcopys( i,i, zero_i, *index ); \ + PASTEMAC(i,copys)( zero_i, *index ); \ \ /* If the vector length is zero, return early. This directly emulates the behavior of netlib BLAS's i?amax() routines. */ \ @@ -455,30 +455,30 @@ void PASTEMAC(ch,varname) \ /* Initialize the maximum absolute value search candidate with -1, which is guaranteed to be less than all values we will compute. */ \ - bli_tcopys( chr, *minus_one, abs_chi1_max ); \ + PASTEMAC(chr,copys)( *minus_one, abs_chi1_max ); \ \ if ( incx == 1 ) \ { \ for ( i = 0; i < n; ++i ) \ { \ /* Get the real and imaginary components of chi1. */ \ - bli_tgets( ch,chr, x[i], chi1_r, chi1_i ); \ + PASTEMAC(ch,chr,gets)( x[i], chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ - tabval2s( chr, chi1_r, chi1_r ); \ - tabval2s( chr, chi1_i, chi1_i ); \ + PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \ + PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \ \ /* Add the real and imaginary absolute values together. */ \ - bli_tset0s( chr, abs_chi1 ); \ - bli_tadds( chr, chi1_r, abs_chi1 ); \ - bli_tadds( chr, chi1_i, abs_chi1 ); \ + PASTEMAC(chr,set0s)( abs_chi1 ); \ + PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \ + PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \ \ /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ \ - if ( abs_chi1_max < abs_chi1 || PASTEMAC(chr,isnan)( abs_chi1 ) ) \ + if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \ { \ abs_chi1_max = abs_chi1; \ *index = i; \ @@ -492,23 +492,23 @@ void PASTEMAC(ch,varname) \ ctype* chi1 = x + (i )*incx; \ \ /* Get the real and imaginary components of chi1. */ \ - bli_tgets( ch,chr, *chi1, chi1_r, chi1_i ); \ + PASTEMAC(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ - tabval2s( chr, chi1_r, chi1_r ); \ - tabval2s( chr, chi1_i, chi1_i ); \ + PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \ + PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \ \ /* Add the real and imaginary absolute values together. */ \ - bli_tset0s( chr, abs_chi1 ); \ - bli_tadds( chr, chi1_r, abs_chi1 ); \ - bli_tadds( chr, chi1_i, abs_chi1 ); \ + PASTEMAC(chr,set0s)( abs_chi1 ); \ + PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \ + PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \ \ /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ \ - if ( abs_chi1_max < abs_chi1 || PASTEMAC(chr,isnan)( abs_chi1 ) ) \ + if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \ { \ abs_chi1_max = abs_chi1; \ *index = i; \ diff --git a/kernels/zen/1/bli_axpyv_zen_int.c b/kernels/zen/1/bli_axpyv_zen_int.c index 019cef60ee..6212f1745a 100644 --- a/kernels/zen/1/bli_axpyv_zen_int.c +++ b/kernels/zen/1/bli_axpyv_zen_int.c @@ -81,7 +81,7 @@ void bli_saxpyv_zen_int v8sf_t y0v, y1v, y2v, y3v; // If the vector dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim1( n ) || bli_teq0s( s, *alpha ) ) return; + if ( bli_zero_dim1( n ) || PASTEMAC(s,eq0)( *alpha ) ) return; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. @@ -186,7 +186,7 @@ void bli_daxpyv_zen_int v4df_t y0v, y1v, y2v, y3v; // If the vector dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim1( n ) || bli_teq0s( d, *alpha ) ) return; + if ( bli_zero_dim1( n ) || PASTEMAC(d,eq0)( *alpha ) ) return; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. diff --git a/kernels/zen/1/bli_axpyv_zen_int10.c b/kernels/zen/1/bli_axpyv_zen_int10.c index 6060b63653..96b8e5f705 100644 --- a/kernels/zen/1/bli_axpyv_zen_int10.c +++ b/kernels/zen/1/bli_axpyv_zen_int10.c @@ -79,7 +79,7 @@ void bli_saxpyv_zen_int10 __m256 zv[10]; // If the vector dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim1( n ) || bli_teq0s( s, *alpha ) ) return; + if ( bli_zero_dim1( n ) || PASTEMAC(s,eq0)( *alpha ) ) return; // Initialize local pointers. const float* restrict xp = x; @@ -286,7 +286,7 @@ void bli_daxpyv_zen_int10 __m256d zv[10]; // If the vector dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim1( n ) || bli_teq0s( d, *alpha ) ) return; + if ( bli_zero_dim1( n ) || PASTEMAC(d,eq0)( *alpha ) ) return; // Initialize local pointers. const double* restrict xp = x; diff --git a/kernels/zen/1/bli_dotv_zen_int.c b/kernels/zen/1/bli_dotv_zen_int.c index 90ee38842b..866817b5d3 100644 --- a/kernels/zen/1/bli_dotv_zen_int.c +++ b/kernels/zen/1/bli_dotv_zen_int.c @@ -87,7 +87,7 @@ void bli_sdotv_zen_int // If the vector dimension is zero, set rho to zero and return early. if ( bli_zero_dim1( n ) ) { - bli_tset0s( s, *rho ); + PASTEMAC(s,set0s)( *rho ); return; } @@ -110,7 +110,7 @@ void bli_sdotv_zen_int const float* restrict yp = y; // Initialize the local scalar rho1 to zero. - bli_tset0s( s, rho_l ); + PASTEMAC(s,set0s)( rho_l ); // Initialize the unrolled iterations' rho vectors to zero. rho0v.v = _mm256_setzero_ps(); @@ -173,7 +173,7 @@ void bli_sdotv_zen_int } // Copy the final result into the output variable. - bli_tcopys( s,s, rho_l, *rho ); + PASTEMAC(s,copys)( rho_l, *rho ); } // ----------------------------------------------------------------------------- @@ -211,7 +211,7 @@ void bli_ddotv_zen_int // If the vector dimension is zero, set rho to zero and return early. if ( bli_zero_dim1( n ) ) { - bli_tset0s( d, *rho ); + PASTEMAC(d,set0s)( *rho ); return; } @@ -234,7 +234,7 @@ void bli_ddotv_zen_int const double* restrict yp = y; // Initialize the local scalar rho1 to zero. - bli_tset0s( d, rho_l ); + PASTEMAC(d,set0s)( rho_l ); // Initialize the unrolled iterations' rho vectors to zero. rho0v.v = _mm256_setzero_pd(); @@ -296,6 +296,6 @@ void bli_ddotv_zen_int } // Copy the final result into the output variable. - bli_tcopys( d,d, rho_l, *rho ); + PASTEMAC(d,copys)( rho_l, *rho ); } diff --git a/kernels/zen/1/bli_dotv_zen_int10.c b/kernels/zen/1/bli_dotv_zen_int10.c index c5211a54fc..9d8efdec30 100644 --- a/kernels/zen/1/bli_dotv_zen_int10.c +++ b/kernels/zen/1/bli_dotv_zen_int10.c @@ -83,7 +83,7 @@ void bli_sdotv_zen_int10 // If the vector dimension is zero, or if alpha is zero, return early. if ( bli_zero_dim1( n ) ) { - bli_tset0s( s, *rho ); + PASTEMAC(s,set0s)( *rho ); return; } @@ -91,7 +91,7 @@ void bli_sdotv_zen_int10 const float* restrict xp = x; const float* restrict yp = y; - bli_tset0s( s, rho_l ); + PASTEMAC(s,set0s)( rho_l ); if ( incx == 1 && incy == 1 ) { @@ -242,7 +242,7 @@ void bli_sdotv_zen_int10 } // Copy the final result into the output variable. - bli_tcopys( s,s, rho_l, *rho ); + PASTEMAC(s,copys)( rho_l, *rho ); } // ----------------------------------------------------------------------------- @@ -275,7 +275,7 @@ void bli_ddotv_zen_int10 // If the vector dimension is zero, or if alpha is zero, return early. if ( bli_zero_dim1( n ) ) { - bli_tset0s( d, *rho ); + PASTEMAC(d,set0s)( *rho ); return; } @@ -283,7 +283,7 @@ void bli_ddotv_zen_int10 const double* restrict xp = x; const double* restrict yp = y; - bli_tset0s( d, rho_l ); + PASTEMAC(d,set0s)( rho_l ); if ( incx == 1 && incy == 1 ) { @@ -455,6 +455,6 @@ void bli_ddotv_zen_int10 } // Copy the final result into the output variable. - bli_tcopys( d,d, rho_l, *rho ); + PASTEMAC(d,copys)( rho_l, *rho ); } diff --git a/kernels/zen/1/bli_dotxv_zen_int.c b/kernels/zen/1/bli_dotxv_zen_int.c index 0bb236c31d..3e41be8797 100644 --- a/kernels/zen/1/bli_dotxv_zen_int.c +++ b/kernels/zen/1/bli_dotxv_zen_int.c @@ -90,17 +90,17 @@ void bli_sdotxv_zen_int // If beta is zero, initialize rho1 to zero instead of scaling // rho by beta (in case rho contains NaN or Inf). - if ( bli_teq0s( s, *beta ) ) + if ( PASTEMAC(s,eq0)( *beta ) ) { - bli_tset0s( s, *rho ); + PASTEMAC(s,set0s)( *rho ); } else { - bli_tscals( s,s,s, *beta, *rho ); + PASTEMAC(s,scals)( *beta, *rho ); } // If the vector dimension is zero, output rho and return early. - if ( bli_zero_dim1( n ) || bli_teq0s( s, *alpha ) ) return; + if ( bli_zero_dim1( n ) || PASTEMAC(s,eq0)( *alpha ) ) return; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. @@ -181,7 +181,7 @@ void bli_sdotxv_zen_int } // Accumulate the final result into the output variable. - bli_taxpys( s,s,s,s, *alpha, rho_l, *rho ); + PASTEMAC(s,axpys)( *alpha, rho_l, *rho ); } // ----------------------------------------------------------------------------- @@ -222,17 +222,17 @@ void bli_ddotxv_zen_int // If beta is zero, initialize rho1 to zero instead of scaling // rho by beta (in case rho contains NaN or Inf). - if ( bli_teq0s( d, *beta ) ) + if ( PASTEMAC(d,eq0)( *beta ) ) { - bli_tset0s( d, *rho ); + PASTEMAC(d,set0s)( *rho ); } else { - bli_tscals( d,d,d, *beta, *rho ); + PASTEMAC(d,scals)( *beta, *rho ); } // If the vector dimension is zero, output rho and return early. - if ( bli_zero_dim1( n ) || bli_teq0s( d, *alpha ) ) return; + if ( bli_zero_dim1( n ) || PASTEMAC(d,eq0)( *alpha ) ) return; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. @@ -312,6 +312,6 @@ void bli_ddotxv_zen_int } // Accumulate the final result into the output variable. - bli_taxpys( d,d,d,d, *alpha, rho_l, *rho ); + PASTEMAC(d,axpys)( *alpha, rho_l, *rho ); } diff --git a/kernels/zen/1/bli_scalv_zen_int.c b/kernels/zen/1/bli_scalv_zen_int.c index ac9ce5e6bf..4dd8b0b5e3 100644 --- a/kernels/zen/1/bli_scalv_zen_int.c +++ b/kernels/zen/1/bli_scalv_zen_int.c @@ -78,10 +78,10 @@ void bli_sscalv_zen_int v8sf_t x0v, x1v, x2v, x3v; // If the vector dimension is zero, or if alpha is unit, return early. - if ( bli_zero_dim1( n ) || bli_teq1s( s, *alpha ) ) return; + if ( bli_zero_dim1( n ) || PASTEMAC(s,eq1)( *alpha ) ) return; // If alpha is zero, use setv (in case y contains NaN or Inf). - if ( bli_teq0s( s, *alpha ) ) + if ( PASTEMAC(s,eq0)( *alpha ) ) { void* zero = bli_s0; setv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_FLOAT, BLIS_SETV_KER, cntx ); @@ -178,10 +178,10 @@ void bli_dscalv_zen_int v4df_t x0v, x1v, x2v, x3v; // If the vector dimension is zero, or if alpha is unit, return early. - if ( bli_zero_dim1( n ) || bli_teq1s( d, *alpha ) ) return; + if ( bli_zero_dim1( n ) || PASTEMAC(d,eq1)( *alpha ) ) return; // If alpha is zero, use setv (in case y contains NaN or Inf). - if ( bli_teq0s( d, *alpha ) ) + if ( PASTEMAC(d,eq0)( *alpha ) ) { void* zero = bli_d0; setv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_DOUBLE, BLIS_SETV_KER, cntx ); diff --git a/kernels/zen/1/bli_scalv_zen_int10.c b/kernels/zen/1/bli_scalv_zen_int10.c index 9f5fb2587c..06099b8e05 100644 --- a/kernels/zen/1/bli_scalv_zen_int10.c +++ b/kernels/zen/1/bli_scalv_zen_int10.c @@ -78,10 +78,10 @@ void bli_sscalv_zen_int10 __m256 zv[10]; // If the vector dimension is zero, or if alpha is unit, return early. - if ( bli_zero_dim1( n ) || bli_teq1s( s, *alpha ) ) return; + if ( bli_zero_dim1( n ) || PASTEMAC(s,eq1)( *alpha ) ) return; // If alpha is zero, use setv. - if ( bli_teq0s( s, *alpha ) ) + if ( PASTEMAC(s,eq0)( *alpha ) ) { if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); @@ -274,10 +274,10 @@ void bli_dscalv_zen_int10 __m256d zv[10]; // If the vector dimension is zero, or if alpha is unit, return early. - if ( bli_zero_dim1( n ) || bli_teq1s( d, *alpha ) ) return; + if ( bli_zero_dim1( n ) || PASTEMAC(d,eq1)( *alpha ) ) return; // If alpha is zero, use setv. - if ( bli_teq0s( d, *alpha ) ) + if ( PASTEMAC(d,eq0)( *alpha ) ) { if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); diff --git a/kernels/zen/1/bli_swapv_zen_int8.c b/kernels/zen/1/bli_swapv_zen_int8.c index 2d7a380065..09ed1cf83e 100644 --- a/kernels/zen/1/bli_swapv_zen_int8.c +++ b/kernels/zen/1/bli_swapv_zen_int8.c @@ -181,14 +181,14 @@ void bli_sswapv_zen_int8 for ( ; (i + 0) < n; i += 1 ) { - bli_tswaps( s,s, x[i], y[i] ); + PASTEMAC(s,swaps)( x[i], y[i] ); } } else { for ( i = 0; i < n; ++i ) { - bli_tswaps( s,s, (*xp), (*yp) ); + PASTEMAC(s,swaps)( (*xp), (*yp) ); xp += incx; yp += incy; @@ -326,14 +326,14 @@ void bli_dswapv_zen_int8 for ( ; (i + 0) < n; i += 1 ) { - bli_tswaps( d,d, x[i], y[i] ); + PASTEMAC(d,swaps)( x[i], y[i] ); } } else { for ( i = 0; i < n; ++i ) { - bli_tswaps( d,d, (*xp), (*yp) ); + PASTEMAC(d,swaps)( (*xp), (*yp) ); xp += incx; yp += incy; diff --git a/kernels/zen/1f/bli_axpyf_zen_int_4.c b/kernels/zen/1f/bli_axpyf_zen_int_4.c index 72055fc702..4e50b4f1ca 100644 --- a/kernels/zen/1f/bli_axpyf_zen_int_4.c +++ b/kernels/zen/1f/bli_axpyf_zen_int_4.c @@ -79,7 +79,7 @@ void bli_caxpyf_zen_int_4 } // If either dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim2( m, b_n ) || bli_teq0s( c, *alpha ) ) return; + if ( bli_zero_dim2( m, b_n ) || bli_ceq0( *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. @@ -96,8 +96,8 @@ void bli_caxpyf_zen_int_4 scomplex* restrict y1 = y + (0 )*incy; scomplex alpha_chi1; - bli_tcopycjs( c,c, conjx, *chi1, alpha_chi1 ); - bli_tscals( c,c,c, *alpha, alpha_chi1 ); + bli_ccopycjs( conjx, *chi1, alpha_chi1 ); + bli_cscals( *alpha, alpha_chi1 ); f ( @@ -129,17 +129,17 @@ void bli_caxpyf_zen_int_4 const scomplex* restrict pchi2 = x + 2*incx ; const scomplex* restrict pchi3 = x + 3*incx ; - bli_tcopycjs( c,c, conjx, *pchi0, chi0 ); - bli_tcopycjs( c,c, conjx, *pchi1, chi1 ); - bli_tcopycjs( c,c, conjx, *pchi2, chi2 ); - bli_tcopycjs( c,c, conjx, *pchi3, chi3 ); + bli_ccopycjs( conjx, *pchi0, chi0 ); + bli_ccopycjs( conjx, *pchi1, chi1 ); + bli_ccopycjs( conjx, *pchi2, chi2 ); + bli_ccopycjs( conjx, *pchi3, chi3 ); } // Scale each chi scalar by alpha. - bli_tscals( c,c,c, *alpha, chi0 ); - bli_tscals( c,c,c, *alpha, chi1 ); - bli_tscals( c,c,c, *alpha, chi2 ); - bli_tscals( c,c,c, *alpha, chi3 ); + bli_cscals( *alpha, chi0 ); + bli_cscals( *alpha, chi1 ); + bli_cscals( *alpha, chi2 ); + bli_cscals( *alpha, chi3 ); lda *= 2; incx *= 2; diff --git a/kernels/zen/1f/bli_axpyf_zen_int_5.c b/kernels/zen/1f/bli_axpyf_zen_int_5.c index 9ef7f34639..78477d3fa1 100644 --- a/kernels/zen/1f/bli_axpyf_zen_int_5.c +++ b/kernels/zen/1f/bli_axpyf_zen_int_5.c @@ -99,7 +99,7 @@ void bli_saxpyf_zen_int_5 float chi4; // If either dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim2( m, b_n ) || bli_teq0s( s, *alpha ) ) return; + if ( bli_zero_dim2( m, b_n ) || bli_seq0( *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. @@ -116,8 +116,8 @@ void bli_saxpyf_zen_int_5 float* restrict y1 = y + (0 )*incy; float alpha_chi1; - bli_tcopycjs( s,s, conjx, *chi1, alpha_chi1 ); - bli_tscals( s,s,s, *alpha, alpha_chi1 ); + bli_scopycjs( conjx, *chi1, alpha_chi1 ); + bli_sscals( *alpha, alpha_chi1 ); f ( @@ -150,11 +150,11 @@ void bli_saxpyf_zen_int_5 // Scale each chi scalar by alpha. - bli_tscals( s,s,s, *alpha, chi0 ); - bli_tscals( s,s,s, *alpha, chi1 ); - bli_tscals( s,s,s, *alpha, chi2 ); - bli_tscals( s,s,s, *alpha, chi3 ); - bli_tscals( s,s,s, *alpha, chi4 ); + bli_sscals( *alpha, chi0 ); + bli_sscals( *alpha, chi1 ); + bli_sscals( *alpha, chi2 ); + bli_sscals( *alpha, chi3 ); + bli_sscals( *alpha, chi4 ); // Broadcast the (alpha*chi?) scalars to all elements of vector registers. chi0v.v = _mm256_broadcast_ss( &chi0 ); @@ -348,7 +348,7 @@ void bli_daxpyf_zen_int_5 double chi4; // If either dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim2( m, b_n ) || bli_teq0s( d, *alpha ) ) return; + if ( bli_zero_dim2( m, b_n ) || bli_deq0( *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. @@ -365,8 +365,8 @@ void bli_daxpyf_zen_int_5 double* restrict y1 = y + (0 )*incy; double alpha_chi1; - bli_tcopycjs( d,d, conjx, *chi1, alpha_chi1 ); - bli_tscals( d,d,d, *alpha, alpha_chi1 ); + bli_dcopycjs( conjx, *chi1, alpha_chi1 ); + bli_dscals( *alpha, alpha_chi1 ); f ( @@ -399,11 +399,11 @@ void bli_daxpyf_zen_int_5 // Scale each chi scalar by alpha. - bli_tscals( d,d,d, *alpha, chi0 ); - bli_tscals( d,d,d, *alpha, chi1 ); - bli_tscals( d,d,d, *alpha, chi2 ); - bli_tscals( d,d,d, *alpha, chi3 ); - bli_tscals( d,d,d, *alpha, chi4 ); + bli_dscals( *alpha, chi0 ); + bli_dscals( *alpha, chi1 ); + bli_dscals( *alpha, chi2 ); + bli_dscals( *alpha, chi3 ); + bli_dscals( *alpha, chi4 ); // Broadcast the (alpha*chi?) scalars to all elements of vector registers. chi0v.v = _mm256_broadcast_sd( &chi0 ); @@ -597,10 +597,10 @@ void bli_daxpyf_zen_int_16x2 v2df_t a40v, a41v; - v2df_t y4v; + v2df_t y4v; // If either dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim2( m, b_n ) || bli_teq0s( d, *alpha ) ) return; + if ( bli_zero_dim2( m, b_n ) || bli_deq0( *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. @@ -617,8 +617,8 @@ void bli_daxpyf_zen_int_16x2 double* restrict y1 = y + (0 )*incy; double alpha_chi1; - bli_tcopycjs( d,d, conjx, *chi1, alpha_chi1 ); - bli_tscals( d,d,d, *alpha, alpha_chi1 ); + bli_dcopycjs( conjx, *chi1, alpha_chi1 ); + bli_dscals( *alpha, alpha_chi1 ); f ( @@ -646,8 +646,8 @@ void bli_daxpyf_zen_int_16x2 // Scale each chi scalar by alpha. - bli_tscals( d,d,d, *alpha, chi0 ); - bli_tscals( d,d,d, *alpha, chi1 ); + bli_dscals( *alpha, chi0 ); + bli_dscals( *alpha, chi1 ); // Broadcast the (alpha*chi?) scalars to all elements of vector registers. chi0v.v = _mm256_broadcast_sd( &chi0 ); @@ -888,7 +888,7 @@ void bli_daxpyf_zen_int_16x4 v2df_t a40v, a41v, a42v, a43v; // If either dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim2( m, b_n ) || bli_teq0s( d, *alpha ) ) return; + if ( bli_zero_dim2( m, b_n ) || bli_deq0( *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. @@ -905,8 +905,8 @@ void bli_daxpyf_zen_int_16x4 double* restrict y1 = y + (0 )*incy; double alpha_chi1; - bli_tcopycjs( d,d, conjx, *chi1, alpha_chi1 ); - bli_tscals( d,d,d, *alpha, alpha_chi1 ); + bli_dcopycjs( conjx, *chi1, alpha_chi1 ); + bli_dscals( *alpha, alpha_chi1 ); f ( @@ -937,10 +937,10 @@ void bli_daxpyf_zen_int_16x4 chi3 = *( x + 3*incx ); // Scale each chi scalar by alpha. - bli_tscals( d,d,d, *alpha, chi0 ); - bli_tscals( d,d,d, *alpha, chi1 ); - bli_tscals( d,d,d, *alpha, chi2 ); - bli_tscals( d,d,d, *alpha, chi3 ); + bli_dscals( *alpha, chi0 ); + bli_dscals( *alpha, chi1 ); + bli_dscals( *alpha, chi2 ); + bli_dscals( *alpha, chi3 ); // Broadcast the (alpha*chi?) scalars to all elements of vector registers. chi0v.v = _mm256_broadcast_sd( &chi0 ); diff --git a/kernels/zen/1f/bli_axpyf_zen_int_8.c b/kernels/zen/1f/bli_axpyf_zen_int_8.c index 80786f7710..d495ad4acb 100644 --- a/kernels/zen/1f/bli_axpyf_zen_int_8.c +++ b/kernels/zen/1f/bli_axpyf_zen_int_8.c @@ -92,7 +92,7 @@ void bli_saxpyf_zen_int_8 float chi4, chi5, chi6, chi7; // If either dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim2( m, b_n ) || bli_teq0s( s, *alpha ) ) return; + if ( bli_zero_dim2( m, b_n ) || PASTEMAC(s,eq0)( *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. @@ -107,8 +107,8 @@ void bli_saxpyf_zen_int_8 float* restrict y1 = y + (0 )*incy; float alpha_chi1; - bli_tcopycjs( s,s, conjx, *chi1, alpha_chi1 ); - bli_tscals( s,s,s, *alpha, alpha_chi1 ); + PASTEMAC(s,copycjs)( conjx, *chi1, alpha_chi1 ); + PASTEMAC(s,scals)( *alpha, alpha_chi1 ); f ( @@ -160,14 +160,14 @@ void bli_saxpyf_zen_int_8 chi7 = *( x + 7*incx ); // Scale each chi scalar by alpha. - bli_tscals( s,s,s, *alpha, chi0 ); - bli_tscals( s,s,s, *alpha, chi1 ); - bli_tscals( s,s,s, *alpha, chi2 ); - bli_tscals( s,s,s, *alpha, chi3 ); - bli_tscals( s,s,s, *alpha, chi4 ); - bli_tscals( s,s,s, *alpha, chi5 ); - bli_tscals( s,s,s, *alpha, chi6 ); - bli_tscals( s,s,s, *alpha, chi7 ); + PASTEMAC(s,scals)( *alpha, chi0 ); + PASTEMAC(s,scals)( *alpha, chi1 ); + PASTEMAC(s,scals)( *alpha, chi2 ); + PASTEMAC(s,scals)( *alpha, chi3 ); + PASTEMAC(s,scals)( *alpha, chi4 ); + PASTEMAC(s,scals)( *alpha, chi5 ); + PASTEMAC(s,scals)( *alpha, chi6 ); + PASTEMAC(s,scals)( *alpha, chi7 ); // Broadcast the (alpha*chi?) scalars to all elements of vector registers. chi0v.v = _mm256_broadcast_ss( &chi0 ); @@ -295,7 +295,7 @@ void bli_daxpyf_zen_int_8 double chi4, chi5, chi6, chi7; // If either dimension is zero, or if alpha is zero, return early. - if ( bli_zero_dim2( m, b_n ) || bli_teq0s( d, *alpha ) ) return; + if ( bli_zero_dim2( m, b_n ) || PASTEMAC(d,eq0)( *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. @@ -310,8 +310,8 @@ void bli_daxpyf_zen_int_8 double* restrict y1 = y + (0 )*incy; double alpha_chi1; - bli_tcopycjs( d,d, conjx, *chi1, alpha_chi1 ); - bli_tscals( d,d,d, *alpha, alpha_chi1 ); + PASTEMAC(d,copycjs)( conjx, *chi1, alpha_chi1 ); + PASTEMAC(d,scals)( *alpha, alpha_chi1 ); f ( @@ -363,14 +363,14 @@ void bli_daxpyf_zen_int_8 chi7 = *( x + 7*incx ); // Scale each chi scalar by alpha. - bli_tscals( d,d,d, *alpha, chi0 ); - bli_tscals( d,d,d, *alpha, chi1 ); - bli_tscals( d,d,d, *alpha, chi2 ); - bli_tscals( d,d,d, *alpha, chi3 ); - bli_tscals( d,d,d, *alpha, chi4 ); - bli_tscals( d,d,d, *alpha, chi5 ); - bli_tscals( d,d,d, *alpha, chi6 ); - bli_tscals( d,d,d, *alpha, chi7 ); + PASTEMAC(d,scals)( *alpha, chi0 ); + PASTEMAC(d,scals)( *alpha, chi1 ); + PASTEMAC(d,scals)( *alpha, chi2 ); + PASTEMAC(d,scals)( *alpha, chi3 ); + PASTEMAC(d,scals)( *alpha, chi4 ); + PASTEMAC(d,scals)( *alpha, chi5 ); + PASTEMAC(d,scals)( *alpha, chi6 ); + PASTEMAC(d,scals)( *alpha, chi7 ); // Broadcast the (alpha*chi?) scalars to all elements of vector registers. chi0v.v = _mm256_broadcast_sd( &chi0 ); diff --git a/kernels/zen/1f/bli_dotxf_zen_int_8.c b/kernels/zen/1f/bli_dotxf_zen_int_8.c index 1077d10837..db62c3c592 100644 --- a/kernels/zen/1f/bli_dotxf_zen_int_8.c +++ b/kernels/zen/1f/bli_dotxf_zen_int_8.c @@ -82,7 +82,7 @@ void bli_sdotxf_zen_int_8 // If the m dimension is zero, or if alpha is zero, the computation // simplifies to updating y. - if ( bli_zero_dim1( m ) || bli_teq0s( s, *alpha ) ) + if ( bli_zero_dim1( m ) || PASTEMAC(s,eq0)( *alpha ) ) { scalv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_FLOAT, BLIS_SCALV_KER, cntx ); @@ -404,7 +404,7 @@ void bli_sdotxf_zen_int_8 // We know at this point that alpha is nonzero; however, beta may still // be zero. If beta is indeed zero, we must overwrite y rather than scale // by beta (in case y contains NaN or Inf). - if ( bli_teq0s( s, *beta ) ) + if ( PASTEMAC(s,eq0)( *beta ) ) { // Apply alpha to the accumulated dot product in rho: // y := alpha * rho @@ -478,7 +478,7 @@ void bli_ddotxf_zen_int_8 // If the m dimension is zero, or if alpha is zero, the computation // simplifies to updating y. - if ( bli_zero_dim1( m ) || bli_teq0s( d, *alpha ) ) + if ( bli_zero_dim1( m ) || PASTEMAC(d,eq0)( *alpha ) ) { scalv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_DOUBLE, BLIS_SCALV_KER, cntx ); @@ -791,7 +791,7 @@ void bli_ddotxf_zen_int_8 // We know at this point that alpha is nonzero; however, beta may still // be zero. If beta is indeed zero, we must overwrite y rather than scale // by beta (in case y contains NaN or Inf). - if ( bli_teq0s( d, *beta ) ) + if ( PASTEMAC(d,eq0)( *beta ) ) { // Apply alpha to the accumulated dot product in rho: // y := alpha * rho diff --git a/kernels/zen/3/bli_gemmt_small.c b/kernels/zen/3/bli_gemmt_small.c index 546ba7906a..f2fd88de7b 100644 --- a/kernels/zen/3/bli_gemmt_small.c +++ b/kernels/zen/3/bli_gemmt_small.c @@ -1586,7 +1586,7 @@ static err_t bli_sgemmt_small } //copy/compute sryk values back to C using SIMD - if ( bli_teq0s( s, *beta_cast ) ) + if ( bli_seq0( *beta_cast ) ) {//just copy in case of beta = 0 dim_t _i, _j, k, _l; if(bli_obj_is_lower(c)) // c is lower @@ -1603,7 +1603,7 @@ static err_t bli_sgemmt_small } while (_i < M ) { - bli_tcopys( s,s, *(C + _i*rsc + _j*ldc), + bli_sscopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } @@ -1616,7 +1616,7 @@ static err_t bli_sgemmt_small k = (k <= M) ? k : M; for ( _i = _j; _i < k; ++_i ) { - bli_tcopys( s,s, *(C + _i*rsc + _j*ldc), + bli_sscopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } k = (M - _i) >> 3; @@ -1631,7 +1631,7 @@ static err_t bli_sgemmt_small } while (_i < M ) { - bli_tcopys( s,s, *(C + _i*rsc + _j*ldc), + bli_sscopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } @@ -1654,7 +1654,7 @@ static err_t bli_sgemmt_small } while (_i <= _j ) { - bli_tcopys( s,s, *(C + _i*rsc + _j*ldc), + bli_sscopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); ++_i; } @@ -1681,7 +1681,7 @@ static err_t bli_sgemmt_small } while (_i < M ) { - bli_txpbys( s,s,s,s, *(C + _i*rsc + _j*ldc), + bli_sssxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; @@ -1695,7 +1695,7 @@ static err_t bli_sgemmt_small k = (k <= M) ? k : M; for ( _i = _j; _i < k; ++_i ) { - bli_txpbys( s,s,s,s, *(C + _i*rsc + _j*ldc), + bli_sssxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } @@ -1713,7 +1713,7 @@ static err_t bli_sgemmt_small } while (_i < M ) { - bli_txpbys( s,s,s,s, *(C + _i*rsc + _j*ldc), + bli_sssxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; @@ -1739,7 +1739,7 @@ static err_t bli_sgemmt_small } while (_i <= _j ) { - bli_txpbys( s,s,s,s, *(C + _i*rsc + _j*ldc), + bli_sssxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); ++_i; @@ -3156,7 +3156,7 @@ static err_t bli_dgemmt_small } //copy/compute sryk values back to C using SIMD - if ( bli_teq0s( s, *beta_cast ) ) + if ( bli_seq0( *beta_cast ) ) {//just copy for beta = 0 dim_t _i, _j, k, _l; if(bli_obj_is_lower(c)) //c is lower @@ -3173,7 +3173,7 @@ static err_t bli_dgemmt_small } while (_i < M ) { - bli_tcopys( d,d, *(C + _i*rsc + _j*ldc), + bli_ddcopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } @@ -3186,7 +3186,7 @@ static err_t bli_dgemmt_small k = (k <= M) ? k : M; for ( _i = _j; _i < k; ++_i ) { - bli_tcopys( d,d, *(C + _i*rsc + _j*ldc), + bli_ddcopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } k = (M - _i) >> 2; @@ -3201,7 +3201,7 @@ static err_t bli_dgemmt_small } while (_i < M ) { - bli_tcopys( d,d, *(C + _i*rsc + _j*ldc), + bli_ddcopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } @@ -3224,7 +3224,7 @@ static err_t bli_dgemmt_small } while (_i <= _j ) { - bli_tcopys( d,d, *(C + _i*rsc + _j*ldc), + bli_ddcopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); ++_i; } @@ -3251,7 +3251,7 @@ static err_t bli_dgemmt_small } while (_i < M ) { - bli_txpbys( d,d,d,d, *(C + _i*rsc + _j*ldc), + bli_dddxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; @@ -3265,7 +3265,7 @@ static err_t bli_dgemmt_small k = (k <= M) ? k : M; for ( _i = _j; _i < k; ++_i ) { - bli_txpbys( d,d,d,d, *(C + _i*rsc + _j*ldc), + bli_dddxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } @@ -3283,7 +3283,7 @@ static err_t bli_dgemmt_small } while (_i < M ) { - bli_txpbys( d,d,d,d, *(C + _i*rsc + _j*ldc), + bli_dddxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; @@ -3309,7 +3309,7 @@ static err_t bli_dgemmt_small } while (_i <= _j ) { - bli_txpbys( d,d,d,d, *(C + _i*rsc + _j*ldc), + bli_dddxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); ++_i; @@ -3717,7 +3717,7 @@ static err_t bli_sgemmt_small_atbn } //copy/compute sryk values back to C - if ( bli_teq0s( s, *beta_cast ) ) //when beta is 0, just copy result to C + if ( bli_seq0( *beta_cast ) ) //when beta is 0, just copy result to C { dim_t _i, _j; if(bli_obj_is_lower(c)) //c is lower @@ -3726,7 +3726,7 @@ static err_t bli_sgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i <= 0 ) { - bli_tcopys( s,s, *(C + _i*rsc + _j*ldc), + bli_sscopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } @@ -3736,7 +3736,7 @@ static err_t bli_sgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i >= 0 ) { - bli_tcopys( s,s, *(C + _i*rsc + _j*ldc), + bli_sscopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } @@ -3750,7 +3750,7 @@ static err_t bli_sgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i <= 0 ) { - bli_txpbys( s,s,s,s, *(C + _i*rsc + _j*ldc), + bli_sssxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } @@ -3761,7 +3761,7 @@ static err_t bli_sgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i >= 0 ) { - bli_txpbys( s,s,s,s, *(C + _i*rsc + _j*ldc), + bli_sssxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } @@ -4149,7 +4149,7 @@ static err_t bli_dgemmt_small_atbn } //copy/compute sryk values back to C - if ( bli_teq0s( s, *beta_cast ) ) //when beta is 0, just copy result to C + if ( bli_seq0( *beta_cast ) ) //when beta is 0, just copy result to C { dim_t _i, _j; if(bli_obj_is_lower(c)) //c is lower @@ -4158,7 +4158,7 @@ static err_t bli_dgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i <= 0 ) { - bli_tcopys( d,d, *(C + _i*rsc + _j*ldc), + bli_ddcopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } @@ -4168,7 +4168,7 @@ static err_t bli_dgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i >= 0 ) { - bli_tcopys( d,d, *(C + _i*rsc + _j*ldc), + bli_ddcopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } @@ -4182,7 +4182,7 @@ static err_t bli_dgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i <= 0 ) { - bli_txpbys( d,d,d,d, *(C + _i*rsc + _j*ldc), + bli_dddxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } @@ -4193,7 +4193,7 @@ static err_t bli_dgemmt_small_atbn for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i >= 0 ) { - bli_txpbys( d,d,d,d, *(C + _i*rsc + _j*ldc), + bli_dddxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } diff --git a/vendor/testcpp/test_sdsdot.cc b/vendor/testcpp/test_sdsdot.cc index 230a2fc4bb..c903c97d33 100644 --- a/vendor/testcpp/test_sdsdot.cc +++ b/vendor/testcpp/test_sdsdot.cc @@ -45,7 +45,7 @@ using namespace std; /* * Test application assumes matrices to be column major, non-transposed */ - + #if 0 template< typename T > void ref_sdsot(int64_t n, @@ -61,7 +61,7 @@ void ref_sdsot(int64_t n, obj_t obj_res; obj_t obj_alpha; num_t dt; - + if(is_same>::value) dt = BLIS_SCOMPLEX; else if(is_same>::value) @@ -72,10 +72,10 @@ void ref_sdsot(int64_t n, bli_obj_create_with_attached_buffer( dt, 1, 1, &alpha, 1,1,&obj_alpha ); bli_obj_create_with_attached_buffer( dt, 1, 1, res_ref, 1, 1,&obj_res ); - bli_tdots( d,d,d,d, &obj_x, + bli_ddots( &obj_x, &obj_y, &obj_res ); - + } #endif @@ -113,7 +113,7 @@ void test_sdsdot() printf("Dot product = %E \n", res); #endif - //ref_sdsot(n, aplha, X, Y , &res_ref ); + //ref_sdsot(n, aplha, X, Y , &res_ref ); #ifdef PRINT printf("Ref Dot product %E \n", res_ref); From f1534a78d7d83b9b90644bbb823f23ce991c076f Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 22 Jan 2025 11:59:15 -0600 Subject: [PATCH 13/19] Add compatibility layer for old code such as optimized kernels. --- frame/include/level0/bli_teqs.h | 17 +++++++++++ frame/include/level0/bli_tscal2s.h | 46 ++++++++++++++++++++++++++++++ frame/include/level0/bli_tsets.h | 7 +++++ 3 files changed, 70 insertions(+) diff --git a/frame/include/level0/bli_teqs.h b/frame/include/level0/bli_teqs.h index f915d1a19d..d474bffea7 100644 --- a/frame/include/level0/bli_teqs.h +++ b/frame/include/level0/bli_teqs.h @@ -148,6 +148,23 @@ // -- Higher-level static functions -------------------------------------------- +// -- Legacy macros ------------------------------------------------------------ + +#define bli_seqs( x, y ) bli_teqs( s,s,s, x, y ) +#define bli_deqs( x, y ) bli_teqs( d,d,d, x, y ) +#define bli_ceqs( x, y ) bli_teqs( c,c,c, x, y ) +#define bli_zeqs( x, y ) bli_teqs( z,z,z, x, y ) + +#define bli_seq1( x ) bli_teq1s( s, x ) +#define bli_deq1( x ) bli_teq1s( d, x ) +#define bli_ceq1( x ) bli_teq1s( c, x ) +#define bli_zeq1( x ) bli_teq1s( z, x ) + +#define bli_seq0( x ) bli_teq0s( s, x ) +#define bli_deq0( x ) bli_teq0s( d, x ) +#define bli_ceq0( x ) bli_teq0s( c, x ) +#define bli_zeq0( x ) bli_teq0s( z, x ) + // -- Notes -------------------------------------------------------------------- #endif diff --git a/frame/include/level0/bli_tscal2s.h b/frame/include/level0/bli_tscal2s.h index 4b0b7e7da1..e5ede99d2d 100644 --- a/frame/include/level0/bli_tscal2s.h +++ b/frame/include/level0/bli_tscal2s.h @@ -561,6 +561,52 @@ PASTECH(bli_tscal2bbs_mxn_,PASTEMAC(chy,dom)) \ } \ } +// -- Legacy macros ------------------------------------------------------------ + +#define bli_sscal2bbs_mxn( conjx, m, n, alpha, x, incx, ldx, y, incy, ldy ) \ + bli_tscal2bbs_mxn( \ + s,s,s,s, \ + conjx, \ + m, \ + n, \ + alpha, \ + x, incx, ldx, \ + y, incy, ldy \ + ) \ + +#define bli_dscal2bbs_mxn( conjx, m, n, alpha, x, incx, ldx, y, incy, ldy ) \ + bli_tscal2bbs_mxn( \ + d,d,d,d, \ + conjx, \ + m, \ + n, \ + alpha, \ + x, incx, ldx, \ + y, incy, ldy \ + ) \ + +#define bli_cscal2bbs_mxn( conjx, m, n, alpha, x, incx, ldx, y, incy, ldy ) \ + bli_tscal2bbs_mxn( \ + c,c,c,c, \ + conjx, \ + m, \ + n, \ + alpha, \ + x, incx, ldx, \ + y, incy, ldy \ + ) \ + +#define bli_zscal2bbs_mxn( conjx, m, n, alpha, x, incx, ldx, y, incy, ldy ) \ + bli_tscal2bbs_mxn( \ + z,z,z,z, \ + conjx, \ + m, \ + n, \ + alpha, \ + x, incx, ldx, \ + y, incy, ldy \ + ) \ + // -- Notes -------------------------------------------------------------------- // -- Domain cases -- diff --git a/frame/include/level0/bli_tsets.h b/frame/include/level0/bli_tsets.h index 30ef235719..e4806a3db1 100644 --- a/frame/include/level0/bli_tsets.h +++ b/frame/include/level0/bli_tsets.h @@ -291,4 +291,11 @@ #endif +// -- Legacy macros ------------------------------------------------------------ + +#define bli_sset0s_edge( i, m, j, n, p, ldp ) bli_tset0s_edge( s, i, m, j, n, (float *)(p), ldp ) +#define bli_dset0s_edge( i, m, j, n, p, ldp ) bli_tset0s_edge( d, i, m, j, n, (double *)(p), ldp ) +#define bli_cset0s_edge( i, m, j, n, p, ldp ) bli_tset0s_edge( c, i, m, j, n, (scomplex*)(p), ldp ) +#define bli_zset0s_edge( i, m, j, n, p, ldp ) bli_tset0s_edge( z, i, m, j, n, (dcomplex*)(p), ldp ) + // -- Notes -------------------------------------------------------------------- From ddb9068aeacd3b61ece95d3ae3fc8ab3eae9fe1d Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 26 Feb 2025 14:04:18 -0600 Subject: [PATCH 14/19] Fix some casting problems in level-0 macros. --- frame/include/level0/bli_tadds.h | 4 ++-- frame/include/level0/bli_taxpbys.h | 4 ++-- frame/include/level0/bli_tcopys.h | 4 ++-- frame/include/level0/bli_tscal2s.h | 8 ++++---- frame/include/level0/bli_txpbys.h | 20 ++++++++++---------- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/frame/include/level0/bli_tadds.h b/frame/include/level0/bli_tadds.h index a49e8de07e..091708c1c4 100644 --- a/frame/include/level0/bli_tadds.h +++ b/frame/include/level0/bli_tadds.h @@ -145,8 +145,8 @@ for ( dim_t jj = 0; jj < (n); ++jj ) \ for ( dim_t ii = 0; ii < (m); ++ii ) \ { \ - PASTEMAC(chx,ctype)* restrict xij = (x) + ii*(rs_x) + jj*(cs_x); \ - PASTEMAC(chy,ctype)* restrict yij = (y) + ii*(rs_y) + jj*(cs_y); \ + PASTEMAC(chx,ctype)* restrict xij = ( PASTEMAC(chx,ctype)* )(x) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = ( PASTEMAC(chy,ctype)* )(y) + ii*(rs_y) + jj*(cs_y); \ \ bli_tadds( chx,chy,chc, *xij, *yij ); \ } \ diff --git a/frame/include/level0/bli_taxpbys.h b/frame/include/level0/bli_taxpbys.h index 6bfe6dfa9b..93b3368e00 100644 --- a/frame/include/level0/bli_taxpbys.h +++ b/frame/include/level0/bli_taxpbys.h @@ -257,8 +257,8 @@ for ( dim_t jj = 0; jj < n; ++jj ) \ for ( dim_t ii = 0; ii < m; ++ii ) \ { \ - PASTEMAC(chx,ctype)* restrict xij = x + ii*(rs_x) + jj*(cs_x); \ - PASTEMAC(chy,ctype)* restrict yij = y + ii*(rs_y) + jj*(cs_y); \ + PASTEMAC(chx,ctype)* restrict xij = ( PASTEMAC(chx,ctype)* )(x) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = ( PASTEMAC(chy,ctype)* )(y) + ii*(rs_y) + jj*(cs_y); \ \ bli_taxpbys( cha,chx,chb,chy,chc, *(alpha), *xij, *(beta), *yij ); \ } \ diff --git a/frame/include/level0/bli_tcopys.h b/frame/include/level0/bli_tcopys.h index 188db13068..d4b8068e31 100644 --- a/frame/include/level0/bli_tcopys.h +++ b/frame/include/level0/bli_tcopys.h @@ -215,8 +215,8 @@ for ( dim_t jj = 0; jj < (n); ++jj ) \ for ( dim_t ii = 0; ii < (m); ++ii ) \ { \ - PASTEMAC(chx,ctype)* restrict xij = (x) + ii*(rs_x) + jj*(cs_x); \ - PASTEMAC(chy,ctype)* restrict yij = (y) + ii*(rs_y) + jj*(cs_y); \ + PASTEMAC(chx,ctype)* restrict xij = ( PASTEMAC(chx,ctype)* )(x) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = ( PASTEMAC(chy,ctype)* )(y) + ii*(rs_y) + jj*(cs_y); \ \ bli_tcopys( chx,chy, *xij, *yij ); \ } \ diff --git a/frame/include/level0/bli_tscal2s.h b/frame/include/level0/bli_tscal2s.h index e5ede99d2d..342727d21c 100644 --- a/frame/include/level0/bli_tscal2s.h +++ b/frame/include/level0/bli_tscal2s.h @@ -491,8 +491,8 @@ PASTECH(bli_tscal2bbs_mxn_,PASTEMAC(chy,dom)) \ for ( dim_t jj = 0; jj < (n); ++jj ) \ for ( dim_t ii = 0; ii < (m); ++ii ) \ { \ - PASTEMAC(chx,ctype)* restrict xij = (x) + ii*(rs_x) + jj*(cs_x); \ - PASTEMAC(chy,ctype)* restrict yij = (y) + ii*(rs_y) + jj*(cs_y); \ + PASTEMAC(chx,ctype)* restrict xij = ( PASTEMAC(chx,ctype)* )(x) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = ( PASTEMAC(chy,ctype)* )(y) + ii*(rs_y) + jj*(cs_y); \ \ bli_tscal2js( cha,chx,chy,chc, *(alpha), *xij, *yij ); \ } \ @@ -502,8 +502,8 @@ PASTECH(bli_tscal2bbs_mxn_,PASTEMAC(chy,dom)) \ for ( dim_t jj = 0; jj < (n); ++jj ) \ for ( dim_t ii = 0; ii < (m); ++ii ) \ { \ - PASTEMAC(chx,ctype)* restrict xij = (x) + ii*(rs_x) + jj*(cs_x); \ - PASTEMAC(chy,ctype)* restrict yij = (y) + ii*(rs_y) + jj*(cs_y); \ + PASTEMAC(chx,ctype)* restrict xij = ( PASTEMAC(chx,ctype)* )(x) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = ( PASTEMAC(chy,ctype)* )(y) + ii*(rs_y) + jj*(cs_y); \ \ bli_tscal2s( cha,chx,chy,chc, *(alpha), *xij, *yij ); \ } \ diff --git a/frame/include/level0/bli_txpbys.h b/frame/include/level0/bli_txpbys.h index 8512a6e799..122ddc4651 100644 --- a/frame/include/level0/bli_txpbys.h +++ b/frame/include/level0/bli_txpbys.h @@ -208,8 +208,8 @@ for ( dim_t jj = 0; jj < n; ++jj ) \ for ( dim_t ii = 0; ii < m; ++ii ) \ { \ - PASTEMAC(chx,ctype)* restrict xij = x + ii*(rs_x) + jj*(cs_x); \ - PASTEMAC(chy,ctype)* restrict yij = y + ii*(rs_y) + jj*(cs_y); \ + PASTEMAC(chx,ctype)* restrict xij = ( PASTEMAC(chx,ctype)* )(x) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = ( PASTEMAC(chy,ctype)* )(y) + ii*(rs_y) + jj*(cs_y); \ \ bli_txpbys( chx,chb,chy,chc, *xij, *(beta), *yij ); \ } \ @@ -229,8 +229,8 @@ { \ if ( (doff_t)jj - (doff_t)ii >= (diagoff) ) \ { \ - const PASTEMAC(chx,ctype)* restrict xij = x + ii*(rs_x) + jj*(cs_x); \ - PASTEMAC(chy,ctype)* restrict yij = y + ii*(rs_y) + jj*(cs_y); \ + PASTEMAC(chx,ctype)* restrict xij = ( PASTEMAC(chx,ctype)* )(x) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = ( PASTEMAC(chy,ctype)* )(y) + ii*(rs_y) + jj*(cs_y); \ \ bli_tcopys( chx,chy, *xij, *yij ); \ } \ @@ -243,8 +243,8 @@ { \ if ( (doff_t)jj - (doff_t)ii >= (diagoff) ) \ { \ - const PASTEMAC(chx,ctype)* restrict xij = x + ii*(rs_x) + jj*(cs_x); \ - PASTEMAC(chy,ctype)* restrict yij = y + ii*(rs_y) + jj*(cs_y); \ + PASTEMAC(chx,ctype)* restrict xij = ( PASTEMAC(chx,ctype)* )(x) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = ( PASTEMAC(chy,ctype)* )(y) + ii*(rs_y) + jj*(cs_y); \ \ bli_txpbys( chx,chb,chy,chc, *xij, *(beta), *yij ); \ } \ @@ -261,8 +261,8 @@ { \ if ( (doff_t)jj - (doff_t)ii <= (diagoff) ) \ { \ - const PASTEMAC(chx,ctype)* restrict xij = x + ii*(rs_x) + jj*(cs_x); \ - PASTEMAC(chy,ctype)* restrict yij = y + ii*(rs_y) + jj*(cs_y); \ + PASTEMAC(chx,ctype)* restrict xij = ( PASTEMAC(chx,ctype)* )(x) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = ( PASTEMAC(chy,ctype)* )(y) + ii*(rs_y) + jj*(cs_y); \ \ bli_tcopys( chx,chy, *xij, *yij ); \ } \ @@ -275,8 +275,8 @@ { \ if ( (doff_t)jj - (doff_t)ii <= (diagoff) ) \ { \ - const PASTEMAC(chx,ctype)* restrict xij = x + ii*(rs_x) + jj*(cs_x); \ - PASTEMAC(chy,ctype)* restrict yij = y + ii*(rs_y) + jj*(cs_y); \ + PASTEMAC(chx,ctype)* restrict xij = ( PASTEMAC(chx,ctype)* )(x) + ii*(rs_x) + jj*(cs_x); \ + PASTEMAC(chy,ctype)* restrict yij = ( PASTEMAC(chy,ctype)* )(y) + ii*(rs_y) + jj*(cs_y); \ \ bli_txpbys( chx,chb,chy,chc, *xij, *(beta), *yij ); \ } \ From 9d1d36836272ee2dd4b4c61e357530d331b8e343 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 26 Feb 2025 14:04:43 -0600 Subject: [PATCH 15/19] Check const-correctness in level-0 macros. Also add some missing in-place tests. --- test/level0/test_tabsq2s.cxx | 4 +- test/level0/test_tabval2s.cxx | 27 +------- test/level0/test_tadd3s.cxx | 24 +++---- test/level0/test_tadds.cxx | 32 +++++----- test/level0/test_taxpbys.cxx | 62 +++++++++--------- test/level0/test_taxpys.cxx | 28 ++++----- test/level0/test_tcopycjs.cxx | 12 ++-- test/level0/test_tcopynzs.cxx | 16 ++--- test/level0/test_tcopys.cxx | 28 ++++----- test/level0/test_teqs.cxx | 20 +++--- test/level0/test_tgets.cxx | 2 +- test/level0/test_tinverts.cxx | 6 +- test/level0/test_tinvscals.cxx | 16 ++--- test/level0/test_tneg2s.cxx | 4 +- test/level0/test_tscal2s.cxx | 112 ++++++++++++++++----------------- test/level0/test_tscalcjs.cxx | 6 +- test/level0/test_tscals.cxx | 40 ++++++------ test/level0/test_tsets.cxx | 20 +++--- test/level0/test_tsqrt2s.cxx | 4 +- test/level0/test_tsubs.cxx | 38 ++++++++--- test/level0/test_txpbys.cxx | 97 +++++++++++++++++----------- 21 files changed, 311 insertions(+), 287 deletions(-) diff --git a/test/level0/test_tabsq2s.cxx b/test/level0/test_tabsq2s.cxx index d938978f99..59fcad91f1 100644 --- a/test/level0/test_tabsq2s.cxx +++ b/test/level0/test_tabsq2s.cxx @@ -45,7 +45,7 @@ #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( norm( convert_prec( x ) ) ); \ \ @@ -66,7 +66,7 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, absq2s ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( norm( convert_prec( x ) ) ); \ \ diff --git a/test/level0/test_tabval2s.cxx b/test/level0/test_tabval2s.cxx index bcce27792c..0209131c6b 100644 --- a/test/level0/test_tabval2s.cxx +++ b/test/level0/test_tabval2s.cxx @@ -45,7 +45,7 @@ #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( absolute( convert_prec( x ) ) ); \ \ @@ -66,7 +66,7 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, abval2s ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( absolute( convert_prec( x ) ) ); \ \ @@ -84,26 +84,3 @@ UNIT_TEST(chx,chy,chc,opname) \ ) INSERT_GENTFUNC_MIX3( RC, RC, R, abval2ris ) - -#undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc ) \ -UNIT_TEST(chx,chy,chz,chc,opname) \ -( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ - { \ - auto z0 = convert( convert_prec( x ) + \ - convert_prec( y ) ); \ -\ - INFO( "x: " << x ); \ - INFO( "y: " << y ); \ -\ - ctypez z; \ - bli_tadd3s( chx,chy,chz,chc, x, y, z ); \ -\ - INFO( "z (C++): " << z0 ); \ - INFO( "z (BLIS): " << z ); \ -\ - check( z, z0 ); \ - } \ -) diff --git a/test/level0/test_tadd3s.cxx b/test/level0/test_tadd3s.cxx index 5933cabe93..f4c18ad5b7 100644 --- a/test/level0/test_tadd3s.cxx +++ b/test/level0/test_tadd3s.cxx @@ -45,8 +45,8 @@ #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc ) \ UNIT_TEST(chx,chy,chz,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto y : test_values() ) \ { \ auto z0 = convert( convert_prec( x ) + \ convert_prec( y ) ); \ @@ -70,8 +70,8 @@ INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3s); #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chz,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( x ) + \ convert_prec( y ) ); \ @@ -87,8 +87,8 @@ UNIT_TEST(chx,chy,chz,chc,opname) \ check( y, y0 ); \ } \ \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( auto x : test_values() ) \ + for ( const auto y : test_values() ) \ { \ auto x0 = convert( convert_prec( x ) + \ convert_prec( y ) ); \ @@ -111,8 +111,8 @@ INSERT_GENTFUNC_MIX3(RC, RC, C, add3s_inplace); #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc ) \ UNIT_TEST(chx,chy,chz,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto y : test_values() ) \ { \ auto z0 = convert( conj( convert_prec( x ) ) + \ convert_prec( y ) ); \ @@ -136,8 +136,8 @@ INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3js); #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc ) \ UNIT_TEST(chx,chy,chz,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto y : test_values() ) \ { \ auto z0 = convert( convert_prec( x ) + \ convert_prec( y ) ); \ @@ -164,8 +164,8 @@ INSERT_GENTFUNC_MIX4(RC, RC, RC, C, add3ris); #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypez, chz, ctypec, chc ) \ UNIT_TEST(chx,chy,chz,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto y : test_values() ) \ { \ auto z0 = convert( conj( convert_prec( x ) ) + \ convert_prec( y ) ); \ diff --git a/test/level0/test_tadds.cxx b/test/level0/test_tadds.cxx index d48512be3b..a2a9bdc10c 100644 --- a/test/level0/test_tadds.cxx +++ b/test/level0/test_tadds.cxx @@ -45,8 +45,8 @@ #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( x ) + \ convert_prec( y ) ); \ @@ -69,8 +69,8 @@ INSERT_GENTFUNC_MIX3(RC, RC, C, adds); #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( conj( convert_prec( x ) ) + \ convert_prec( y ) ); \ @@ -93,8 +93,8 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, addjs ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( x ) + \ convert_prec( y ) ); \ @@ -119,8 +119,8 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, addris ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( conj( convert_prec( x ) ) + \ convert_prec( y ) ); \ @@ -148,11 +148,11 @@ UNIT_TEST(chx,chy,chc,opname) \ constexpr auto M = 4; \ constexpr auto N = 4; \ \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto y : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto ymn = tile( y ); \ + const auto xmn = tile( x ); \ + auto ymn = tile( y ); \ \ INFO( "row-major" ); \ \ @@ -170,11 +170,11 @@ UNIT_TEST(chx,chy,chc,opname) \ check( ymn, ymn0 ); \ } \ \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto y : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto ymn = tile( y ); \ + const auto xmn = tile( x ); \ + auto ymn = tile( y ); \ \ INFO( "column-major" ); \ \ diff --git a/test/level0/test_taxpbys.cxx b/test/level0/test_taxpbys.cxx index d595f25058..410e717248 100644 --- a/test/level0/test_taxpbys.cxx +++ b/test/level0/test_taxpbys.cxx @@ -45,10 +45,10 @@ #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chb,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ convert_prec( x ) + \ @@ -75,9 +75,9 @@ INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, R, axpbys ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypec, chc ) \ UNIT_TEST(cha,chx,chb,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( auto x : test_values() ) \ + for ( const auto b : test_values() ) \ { \ auto x0 = convert( convert_prec( a ) * \ convert_prec( x ) + \ @@ -103,10 +103,10 @@ INSERT_GENTFUNC_MIX4( RC, RC, RC, R, axpbys_inplace ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chb,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ conj( convert_prec( x ) ) + \ @@ -133,10 +133,10 @@ INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, R, axpbyjs ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chb,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ convert_prec( x ) + \ @@ -167,10 +167,10 @@ INSERT_GENTFUNC_MIX5( RC, RC, RC, RC, R, axpbyris ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chb,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ conj( convert_prec( x ) ) + \ @@ -204,13 +204,13 @@ UNIT_TEST(cha,chx,chb,chy,chc,opname) \ constexpr auto M = 4; \ constexpr auto N = 4; \ \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + for ( const auto y : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto ymn = tile( y ); \ + const auto xmn = tile( x ); \ + auto ymn = tile( y ); \ \ INFO( "row-major" ); \ \ @@ -228,13 +228,13 @@ UNIT_TEST(cha,chx,chb,chy,chc,opname) \ check( ymn, ymn0 ); \ } \ \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + for ( const auto y : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto ymn = tile( y ); \ + const auto xmn = tile( x ); \ + auto ymn = tile( y ); \ \ INFO( "column-major" ); \ \ diff --git a/test/level0/test_taxpys.cxx b/test/level0/test_taxpys.cxx index bf80277977..87bc238248 100644 --- a/test/level0/test_taxpys.cxx +++ b/test/level0/test_taxpys.cxx @@ -45,9 +45,9 @@ #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ convert_prec( x ) + \ @@ -72,8 +72,8 @@ INSERT_GENTFUNC_MIX4( RC, RC, RC, R, axpys ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ UNIT_TEST(cha,chx,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( auto x : test_values() ) \ { \ auto x0 = convert( convert_prec( a ) * \ convert_prec( x ) + \ @@ -97,9 +97,9 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, axpys_inplace ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ conj( convert_prec( x ) ) + \ @@ -124,9 +124,9 @@ INSERT_GENTFUNC_MIX4( RC, RC, RC, R, axpyjs ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ convert_prec( x ) + \ @@ -154,9 +154,9 @@ INSERT_GENTFUNC_MIX4( RC, RC, RC, R, axpyris ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ conj( convert_prec( x ) ) + \ diff --git a/test/level0/test_tcopycjs.cxx b/test/level0/test_tcopycjs.cxx index b113e51590..b152109b4e 100644 --- a/test/level0/test_tcopycjs.cxx +++ b/test/level0/test_tcopycjs.cxx @@ -45,8 +45,8 @@ #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto x : test_values() ) \ + for ( const auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( bli_is_conj( conjx ) ? conj( x ) : x ); \ \ @@ -69,8 +69,8 @@ INSERT_GENTFUNC_MIX2( RC, RC, copycjs ) #define GENTFUNC( opname, ctypex, chx ) \ UNIT_TEST(chx,opname) \ ( \ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto x : test_values() ) \ + for ( const auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( auto x : test_values() ) \ { \ auto x0 = convert( bli_is_conj( conjx ) ? conj( x ) : x ); \ \ @@ -92,8 +92,8 @@ INSERT_GENTFUNC_MIX1( RC, copycjs_inplace ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto x : test_values() ) \ + for ( const auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( bli_is_conj( conjx ) ? conj( x ) : x ); \ \ diff --git a/test/level0/test_tcopynzs.cxx b/test/level0/test_tcopynzs.cxx index ce0b1b4814..d321025ff0 100644 --- a/test/level0/test_tcopynzs.cxx +++ b/test/level0/test_tcopynzs.cxx @@ -45,8 +45,8 @@ #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = y; \ real( y0 ) = convert_prec( real( x ) ); \ @@ -71,8 +71,8 @@ INSERT_GENTFUNC_MIX2( RC, RC, copynzs ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = y; \ real( y0 ) = convert_prec( real( x ) ); \ @@ -97,8 +97,8 @@ INSERT_GENTFUNC_MIX2( RC, RC, copyjnzs ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = y; \ real( y0 ) = convert_prec( real( x ) ); \ @@ -125,8 +125,8 @@ INSERT_GENTFUNC_MIX2( RC, RC, copynzris ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = y; \ real( y0 ) = convert_prec( real( x ) ); \ diff --git a/test/level0/test_tcopys.cxx b/test/level0/test_tcopys.cxx index a805c93e98..cede1f3414 100644 --- a/test/level0/test_tcopys.cxx +++ b/test/level0/test_tcopys.cxx @@ -45,7 +45,7 @@ #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( x ); \ \ @@ -67,7 +67,7 @@ INSERT_GENTFUNC_MIX2( RC, RC, copys ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( conj( x ) ); \ \ @@ -110,7 +110,7 @@ INSERT_GENTFUNC_MIX1( RC, copyjs_inplace ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( x ); \ \ @@ -134,7 +134,7 @@ INSERT_GENTFUNC_MIX2( RC, RC, copyris ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( conj( x ) ); \ \ @@ -158,7 +158,7 @@ INSERT_GENTFUNC_MIX2( RC, RC, copyjris ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto yri0 = convert( x ); \ auto yir0 = convert( swapri( conj( x ) ) ); \ @@ -184,7 +184,7 @@ INSERT_GENTFUNC_MIX2( C, C, copy1es ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto yri0 = convert( conj( x ) ); \ auto yir0 = convert( swapri( x ) ); \ @@ -210,7 +210,7 @@ INSERT_GENTFUNC_MIX2( C, C, copyj1es ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( x ); \ \ @@ -232,7 +232,7 @@ INSERT_GENTFUNC_MIX2( C, C, copy1rs ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( conj( x ) ); \ \ @@ -257,10 +257,10 @@ UNIT_TEST(chx,chy,opname) \ constexpr auto M = 4; \ constexpr auto N = 4; \ \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto ymn = tile(); \ + const auto xmn = tile( x ); \ + auto ymn = tile(); \ \ INFO( "row-major" ); \ \ @@ -277,10 +277,10 @@ UNIT_TEST(chx,chy,opname) \ check( ymn, ymn0 ); \ } \ \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto ymn = tile(); \ + const auto xmn = tile( x ); \ + auto ymn = tile(); \ \ INFO( "column-major" ); \ \ diff --git a/test/level0/test_teqs.cxx b/test/level0/test_teqs.cxx index 31b58364af..b07df602ca 100644 --- a/test/level0/test_teqs.cxx +++ b/test/level0/test_teqs.cxx @@ -45,8 +45,8 @@ #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto y : test_values() ) \ { \ auto expected = convert_prec( x ) == \ convert_prec( y ); \ @@ -69,8 +69,8 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, eqs ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto y : test_values() ) \ { \ auto expected = convert_prec( x ) == \ convert_prec( y ); \ @@ -95,7 +95,7 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, eqris ) #define GENTFUNC( opname, ctypex, chx ) \ UNIT_TEST(chx,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto expected = x == convert_prec( 1.0 ); \ \ @@ -116,7 +116,7 @@ INSERT_GENTFUNC_MIX1( RC, eq1ris ) #define GENTFUNC( opname, ctypex, chx ) \ UNIT_TEST(chx,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto expected = x == convert_prec( 0.0 ); \ \ @@ -137,7 +137,7 @@ INSERT_GENTFUNC_MIX1( RC, eq0ris ) #define GENTFUNC( opname, ctypex, chx ) \ UNIT_TEST(chx,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto expected = x == convert_prec( -1.0 ); \ \ @@ -158,7 +158,7 @@ INSERT_GENTFUNC_MIX1( RC, eqm1ris ) #define GENTFUNC( opname, ctypex, chx ) \ UNIT_TEST(chx,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto expected = x == convert_prec( 1.0 ); \ \ @@ -179,7 +179,7 @@ INSERT_GENTFUNC_MIX1( RC, eq1s ) #define GENTFUNC( opname, ctypex, chx ) \ UNIT_TEST(chx,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto expected = x == convert_prec( 0.0 ); \ \ @@ -200,7 +200,7 @@ INSERT_GENTFUNC_MIX1( RC, eq0s ) #define GENTFUNC( opname, ctypex, chx ) \ UNIT_TEST(chx,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto expected = x == convert_prec( -1.0 ); \ \ diff --git a/test/level0/test_tgets.cxx b/test/level0/test_tgets.cxx index 09aa328e0b..a4c0486f5c 100644 --- a/test/level0/test_tgets.cxx +++ b/test/level0/test_tgets.cxx @@ -49,7 +49,7 @@ UNIT_TEST(chx,chy,opname) \ using ctypeyr = make_real_t; \ using ctypeyc = make_complex_t; \ \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( x ); \ \ diff --git a/test/level0/test_tinverts.cxx b/test/level0/test_tinverts.cxx index 66f72a20a8..180189eed3 100644 --- a/test/level0/test_tinverts.cxx +++ b/test/level0/test_tinverts.cxx @@ -45,7 +45,7 @@ #define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ UNIT_TEST(chx,chc,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( 1.0 ) / \ convert_prec( x ) ); \ @@ -68,7 +68,7 @@ INSERT_GENTFUNC_MIX2( RC, R, inverts ) #define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ UNIT_TEST(chx,chc,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( 1.0 ) / \ convert_prec( x ) ); \ @@ -91,7 +91,7 @@ INSERT_GENTFUNC_MIX2( RC, R, invertris ) #define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ UNIT_TEST(chx,chc,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto xri = x; \ auto xir = swapri( conj( x ) ); \ diff --git a/test/level0/test_tinvscals.cxx b/test/level0/test_tinvscals.cxx index 086b48a859..7631e4f5a8 100644 --- a/test/level0/test_tinvscals.cxx +++ b/test/level0/test_tinvscals.cxx @@ -45,8 +45,8 @@ #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ UNIT_TEST(cha,chx,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( x ) / \ convert_prec( a ) ); \ @@ -70,8 +70,8 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, invscals ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ UNIT_TEST(cha,chx,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( x ) / \ convert_prec( conj( a ) ) ); \ @@ -95,8 +95,8 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, invscaljs ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ UNIT_TEST(cha,chx,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( x ) / \ convert_prec( a ) ); \ @@ -122,8 +122,8 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, invscalris ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ UNIT_TEST(cha,chx,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( x ) / \ convert_prec( conj( a ) ) ); \ diff --git a/test/level0/test_tneg2s.cxx b/test/level0/test_tneg2s.cxx index 28aa3e1788..b0c65d7aa3 100644 --- a/test/level0/test_tneg2s.cxx +++ b/test/level0/test_tneg2s.cxx @@ -45,7 +45,7 @@ #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( -x ); \ \ @@ -88,7 +88,7 @@ INSERT_GENTFUNC_MIX1( RC, neg2s_inplace ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( -x ); \ \ diff --git a/test/level0/test_tscal2s.cxx b/test/level0/test_tscal2s.cxx index 1cacf7fef0..c0ae8ea997 100644 --- a/test/level0/test_tscal2s.cxx +++ b/test/level0/test_tscal2s.cxx @@ -45,8 +45,8 @@ #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ convert_prec( x ) ); \ @@ -70,8 +70,8 @@ INSERT_GENTFUNC_MIX4( RC, RC, RC, R, scal2s ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ UNIT_TEST(cha,chx,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( auto x : test_values() ) \ { \ auto x0 = convert( convert_prec( a ) * \ convert_prec( x ) ); \ @@ -94,8 +94,8 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, scal2s_inplace ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ convert_prec( conj( x ) ) ); \ @@ -119,8 +119,8 @@ INSERT_GENTFUNC_MIX4( RC, RC, RC, R, scal2js ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ convert_prec( x ) ); \ @@ -147,8 +147,8 @@ INSERT_GENTFUNC_MIX4( RC, RC, RC, R, scal2ris ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ convert_prec( conj( x ) ) ); \ @@ -175,8 +175,8 @@ INSERT_GENTFUNC_MIX4( RC, RC, RC, R, scal2jris ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto yri0 = convert( convert_prec( a ) * \ convert_prec( x ) ); \ @@ -204,8 +204,8 @@ INSERT_GENTFUNC_MIX4( RC, C, C, R, scal21es ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto yri0 = convert( convert_prec( a ) * \ convert_prec( conj( x ) ) ); \ @@ -233,8 +233,8 @@ INSERT_GENTFUNC_MIX4( RC, C, C, R, scal2j1es ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ convert_prec( x ) ); \ @@ -258,8 +258,8 @@ INSERT_GENTFUNC_MIX4( RC, C, C, R, scal21rs ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(cha,chx,chy,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ convert_prec( conj( x ) ) ); \ @@ -292,13 +292,13 @@ UNIT_TEST(cha,chx,chy,chc,PASTECH(opname,_,D)) \ constexpr auto M = 4; \ constexpr auto N = 4; \ \ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto ymn00 = tile(); \ - auto ymn = tile(); \ + const auto xmn = tile( x ); \ + auto ymn00 = tile(); \ + auto ymn = tile(); \ \ INFO("column-major"); \ \ @@ -327,12 +327,12 @@ UNIT_TEST(cha,chx,chy,chc,opname) \ constexpr auto M = 4; \ constexpr auto N = 4; \ \ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto ymn = tile(); \ + const auto xmn = tile( x ); \ + auto ymn = tile(); \ \ INFO( "row-major" ); \ \ @@ -351,12 +351,12 @@ UNIT_TEST(cha,chx,chy,chc,opname) \ check( ymn, ymn0 ); \ } \ \ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto ymn = tile(); \ + const auto xmn = tile( x ); \ + auto ymn = tile(); \ \ INFO("column-major"); \ \ @@ -385,12 +385,12 @@ UNIT_TEST(cha,chx,chy,chc,opname) \ constexpr auto M = 4; \ constexpr auto N = 4; \ \ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto ymn = tile(); \ + const auto xmn = tile( x ); \ + auto ymn = tile(); \ \ INFO( "row-major" ); \ \ @@ -415,12 +415,12 @@ UNIT_TEST(cha,chx,chy,chc,opname) \ check( ymn, ymn0 ); \ } \ \ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto ymn = tile(); \ + const auto xmn = tile( x ); \ + auto ymn = tile(); \ \ INFO( "column-major" ); \ \ @@ -457,13 +457,13 @@ UNIT_TEST(cha,chx,chy,chc,opname) \ \ using ctypeyr = make_real_t; \ \ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto yrmn = tile(); \ - auto yimn = tile(); \ + const auto xmn = tile( x ); \ + auto yrmn = tile(); \ + auto yimn = tile(); \ \ INFO( "row-major" ); \ \ @@ -494,13 +494,13 @@ UNIT_TEST(cha,chx,chy,chc,opname) \ check( yimn, yimn0 ); \ } \ \ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto yrmn = tile(); \ - auto yimn = tile(); \ + const auto xmn = tile( x ); \ + auto yrmn = tile(); \ + auto yimn = tile(); \ \ INFO( "column-major" ); \ \ diff --git a/test/level0/test_tscalcjs.cxx b/test/level0/test_tscalcjs.cxx index 058f64c7b6..4351aa13d1 100644 --- a/test/level0/test_tscalcjs.cxx +++ b/test/level0/test_tscalcjs.cxx @@ -45,9 +45,9 @@ #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ UNIT_TEST(cha,chx,chc,opname) \ ( \ - for ( auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto conjx : { BLIS_CONJUGATE, BLIS_NO_CONJUGATE } ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( bli_is_conj( conjx ) ? conj( a ) : a ) * \ convert_prec( x ) ); \ diff --git a/test/level0/test_tscals.cxx b/test/level0/test_tscals.cxx index dd8c550ec6..ffa0e7a2f5 100644 --- a/test/level0/test_tscals.cxx +++ b/test/level0/test_tscals.cxx @@ -45,8 +45,8 @@ #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ UNIT_TEST(cha,chx,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ convert_prec( x ) ); \ @@ -70,8 +70,8 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, scals ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ UNIT_TEST(cha,chx,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( conj( a ) ) * \ convert_prec( x ) ); \ @@ -95,8 +95,8 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, scaljs ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ UNIT_TEST(cha,chx,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( a ) * \ convert_prec( x ) ); \ @@ -122,8 +122,8 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, scalris ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ UNIT_TEST(cha,chx,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( convert_prec( conj( a ) ) * \ convert_prec( x ) ); \ @@ -149,8 +149,8 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, scaljris ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ UNIT_TEST(cha,chx,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto xri = x; \ auto xir = swapri( conj( x ) ); \ @@ -182,8 +182,8 @@ INSERT_GENTFUNC_MIX3( RC, C, R, scal1es ) #define GENTFUNC( opname, ctypea, cha, ctypex, chx, ctypec, chc ) \ UNIT_TEST(cha,chx,chc,opname) \ ( \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ + for ( const auto a : test_values() ) \ + for ( auto x : test_values() ) \ { \ auto x0 = convert( convert_prec( a ) * \ convert_prec( x ) ); \ @@ -209,10 +209,10 @@ UNIT_TEST(cha,chx,chc,opname) \ constexpr auto M = 4; \ constexpr auto N = 4; \ \ - for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto diagoff : { -1, 0, 1 } ) \ + for ( const uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto diagoff : { -1, 0, 1 } ) \ { \ auto xmn = tile( x ); \ \ @@ -241,10 +241,10 @@ UNIT_TEST(cha,chx,chc,opname) \ check( xmn, xmn0 ); \ } \ \ - for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ - for ( auto a : test_values() ) \ - for ( auto x : test_values() ) \ - for ( auto diagoff : { -1, 0, 1 } ) \ + for ( const uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ + for ( const auto a : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto diagoff : { -1, 0, 1 } ) \ { \ auto xmn = tile( x ); \ \ diff --git a/test/level0/test_tsets.cxx b/test/level0/test_tsets.cxx index b5f5c789e4..445e648e35 100644 --- a/test/level0/test_tsets.cxx +++ b/test/level0/test_tsets.cxx @@ -45,7 +45,7 @@ #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( x ); \ \ @@ -67,8 +67,8 @@ INSERT_GENTFUNC_MIX2( RC, RC, sets ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = y; \ real( y0 ) = convert_prec( real( x ) ); \ @@ -90,8 +90,8 @@ INSERT_GENTFUNC_MIX2( RC, RC, setrs ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = y; \ imag( y0 ) = convert_prec( imag( x ) ); \ @@ -113,7 +113,7 @@ INSERT_GENTFUNC_MIX2( RC, RC, setis ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy ) \ UNIT_TEST(chx,chy,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( x ); \ \ @@ -245,7 +245,7 @@ UNIT_TEST(chy,opname) \ constexpr auto M = 4; \ constexpr auto N = 4; \ \ - for ( auto y : test_values() ) \ + for ( const auto y : test_values() ) \ { \ auto ymn = tile( y ); \ \ @@ -263,7 +263,7 @@ UNIT_TEST(chy,opname) \ check( ymn, ymn0 ); \ } \ \ - for ( auto y : test_values() ) \ + for ( const auto y : test_values() ) \ { \ auto ymn = tile( y ); \ \ @@ -297,7 +297,7 @@ UNIT_TEST(chy,PASTECH(opname,_,D)) \ constexpr auto M = 4; \ constexpr auto N = 4; \ \ - for ( auto y : test_values() ) \ + for ( const auto y : test_values() ) \ { \ auto ymn = tile( y ); \ \ @@ -336,7 +336,7 @@ UNIT_TEST(chy,PASTECH(opname,_,M,_,N)) \ constexpr auto M0 = 10; \ constexpr auto N0 = 10; \ \ - for ( auto y : test_values() ) \ + for ( const auto y : test_values() ) \ { \ auto ymn = tile( y ); \ \ diff --git a/test/level0/test_tsqrt2s.cxx b/test/level0/test_tsqrt2s.cxx index 070eea9e4e..840531477d 100644 --- a/test/level0/test_tsqrt2s.cxx +++ b/test/level0/test_tsqrt2s.cxx @@ -45,7 +45,7 @@ #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( square_root( convert_prec( x ) ) ); \ \ @@ -66,7 +66,7 @@ INSERT_GENTFUNC_MIX3( R, R, R, sqrt2s ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ + for ( const auto x : test_values() ) \ { \ auto y0 = convert( square_root( convert_prec( x ) ) ); \ \ diff --git a/test/level0/test_tsubs.cxx b/test/level0/test_tsubs.cxx index 10e27af4eb..3613c32454 100644 --- a/test/level0/test_tsubs.cxx +++ b/test/level0/test_tsubs.cxx @@ -45,8 +45,8 @@ #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( y ) - \ convert_prec( x ) ); \ @@ -66,11 +66,33 @@ UNIT_TEST(chx,chy,chc,opname) \ INSERT_GENTFUNC_MIX3( RC, RC, R, subs ) #undef GENTFUNC -#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +#define GENTFUNC( opname, ctypex, chx, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + { \ + auto x0 = convert( convert_prec( x ) - \ + convert_prec( x ) ); \ +\ + INFO( "x: " << x ); \ +\ + bli_tsubs( chx,chx,chc, x, x ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "x (BLIS): " << x ); \ +\ + check( x, x0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX2( RC, R, subs_inplace ) + +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ +UNIT_TEST(chx,chy,chc,opname) \ +( \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( y ) - \ conj( convert_prec( x ) ) ); \ @@ -93,8 +115,8 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, subjs ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( y ) - \ convert_prec( x ) ); \ @@ -119,8 +141,8 @@ INSERT_GENTFUNC_MIX3( RC, RC, R, subris ) #define GENTFUNC( opname, ctypex, chx, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( y ) - \ conj( convert_prec( x ) ) ); \ diff --git a/test/level0/test_txpbys.cxx b/test/level0/test_txpbys.cxx index d05ad4bcaf..1d0dbfb43f 100644 --- a/test/level0/test_txpbys.cxx +++ b/test/level0/test_txpbys.cxx @@ -45,9 +45,9 @@ #define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chb,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( x ) + \ convert_prec( b ) * \ @@ -68,13 +68,38 @@ UNIT_TEST(chx,chb,chy,chc,opname) \ INSERT_GENTFUNC_MIX4( RC, RC, RC, R, xpbys ) +#undef GENTFUNC +#define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypec, chc ) \ +UNIT_TEST(chx,chb,chy,chc,opname) \ +( \ + for ( auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + { \ + auto x0 = convert( convert_prec( x ) + \ + convert_prec( b ) * \ + convert_prec( x ) ); \ +\ + INFO( "x: " << x ); \ + INFO( "b: " << b ); \ +\ + bli_txpbys( chx,chb,chx,chc, x, b, x ); \ +\ + INFO( "x (C++): " << x0 ); \ + INFO( "x (BLIS): " << x ); \ +\ + check( x, x0 ); \ + } \ +) + +INSERT_GENTFUNC_MIX3( RC, RC, R, xpbys_inplace ) + #undef GENTFUNC #define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chb,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( conj( convert_prec( x ) ) + \ convert_prec( b ) * \ @@ -99,9 +124,9 @@ INSERT_GENTFUNC_MIX4( RC, RC, RC, R, xpbyjs ) #define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chb,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( convert_prec( x ) + \ convert_prec( b ) * \ @@ -130,9 +155,9 @@ INSERT_GENTFUNC_MIX4( RC, RC, RC, R, xpbyris ) #define GENTFUNC( opname, ctypex, chx, ctypeb, chb, ctypey, chy, ctypec, chc ) \ UNIT_TEST(chx,chb,chy,chc,opname) \ ( \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + for ( auto y : test_values() ) \ { \ auto y0 = convert( conj( convert_prec( x ) ) + \ convert_prec( b ) * \ @@ -163,12 +188,12 @@ UNIT_TEST(chx,chb,chy,chc,opname) \ constexpr auto M = 4; \ constexpr auto N = 4; \ \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + for ( const auto y : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto ymn = tile( y ); \ + const auto xmn = tile( x ); \ + auto ymn = tile( y ); \ \ INFO( "row-major" ); \ \ @@ -187,12 +212,12 @@ UNIT_TEST(chx,chb,chy,chc,opname) \ check( ymn, ymn0 ); \ } \ \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ + for ( const auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + for ( const auto y : test_values() ) \ { \ - auto xmn = tile( x ); \ - auto ymn = tile( y ); \ + const auto xmn = tile( x ); \ + auto ymn = tile( y ); \ \ INFO( "column-major" ); \ \ @@ -220,14 +245,14 @@ UNIT_TEST(chx,chb,chy,chc,opname) \ constexpr auto M = 4; \ constexpr auto N = 4; \ \ - for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ - for ( auto diagoff : { -1, 0, 1 } ) \ + for ( const uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ + for ( const auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + for ( const auto y : test_values() ) \ + for ( const auto diagoff : { -1, 0, 1 } ) \ { \ - auto xmn = tile( x ); \ - auto ymn = tile( y ); \ + const auto xmn = tile( x ); \ + auto ymn = tile( y ); \ \ INFO( "row-major" ); \ \ @@ -251,14 +276,14 @@ UNIT_TEST(chx,chb,chy,chc,opname) \ check( ymn, ymn0 ); \ } \ \ - for ( uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ - for ( auto x : test_values() ) \ - for ( auto b : test_values() ) \ - for ( auto y : test_values() ) \ - for ( auto diagoff : { -1, 0, 1 } ) \ + for ( const uplo_t uplo : { BLIS_UPPER, BLIS_LOWER } ) \ + for ( const auto x : test_values() ) \ + for ( const auto b : test_values() ) \ + for ( const auto y : test_values() ) \ + for ( const auto diagoff : { -1, 0, 1 } ) \ { \ - auto xmn = tile( x ); \ - auto ymn = tile( y ); \ + const auto xmn = tile( x ); \ + auto ymn = tile( y ); \ \ INFO( "column-major" ); \ \ From a686d1bbcf1a3bea6a7f2a106d6c0cef260ee1bd Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 26 Feb 2025 15:39:58 -0600 Subject: [PATCH 16/19] Add level-0 macro testing to CI. --- .circleci/config.yml | 13 +++++++++---- ci/do_level0.sh | 16 ++++++++++++++++ test/level0/Makefile | 10 +++++----- test/level0/test_l0.cxx | 2 +- test/level0/test_tsubs.cxx | 1 + 5 files changed, 32 insertions(+), 10 deletions(-) create mode 100755 ci/do_level0.sh diff --git a/.circleci/config.yml b/.circleci/config.yml index 339fc11cf0..8c73a0eec2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -30,6 +30,7 @@ workflows: # OOT: 0 # TEST: FAST # SDE: 0 + # LEVEL0: 0 # THR: none # CONF: auto # BLD: '' @@ -41,20 +42,18 @@ workflows: - build: OOT: 1 TEST: ALL - SDE: 0 CONF: x86_64 # SDE testing for x86_64 - build: # linuxvm must be used because it provides 8G RAM and SDE fails with 4G RAM os: linuxvm - OOT: 0 - TEST: FAST SDE: 1 CONF: x86_64 - # openmp build + # openmp build + LEVEL0 - build: + LEVEL0: 1 THR: openmp # pthreads build @@ -153,6 +152,9 @@ jobs: SDE: type: integer default: 0 + LEVEL0: + type: integer + default: 0 THR: type: string default: none @@ -197,6 +199,7 @@ jobs: export BLD="<< parameters.BLD >>" export LDFLAGS="<< parameters.LDFLAGS >>" export SDE="<< parameters.SDE >>" + export LEVEL0="<< parameters.LEVEL0 >>" export THR="<< parameters.THR >>" export TESTSUITE_WRAPPER="<< parameters.TESTSUITE_WRAPPER >>" @@ -232,6 +235,7 @@ jobs: echo "TEST = $TEST" echo "BLD = $BLD" echo "SDE = $SDE" + echo "LEVEL0 = $LEVEL0" echo "DIST_PATH = $DIST_PATH" echo "LDFLAGS = $LDFLAGS" echo "TESTSUITE_WRAPPER = $TESTSUITE_WRAPPER" @@ -251,3 +255,4 @@ jobs: if [ "$CONF" = "armsve" ]; then sed -i 's/.*\.*/0/' $DIST_PATH/testsuite/input.operations.fast; fi if [ "$TEST" != "0" ]; then $DIST_PATH/ci/do_testsuite.sh; fi if [ "$SDE" = "1" ]; then $DIST_PATH/ci/do_sde.sh; fi + if [ "$LEVEL0" = "1" ]; then $DIST_PATH/ci/do_level0.sh; fi diff --git a/ci/do_level0.sh b/ci/do_level0.sh new file mode 100755 index 0000000000..792e075904 --- /dev/null +++ b/ci/do_level0.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -e +set -x + +: ${SRCDIR:=../..} + +if ! [ -d test/level0 ]; then + mkdir -p test/level0 + ln -s $SRCDIR/test/level0/* test/level0/ +fi + +cd test/level0 +make -j2 + +./test_l0.x diff --git a/test/level0/Makefile b/test/level0/Makefile index 1af6dbfb83..a9168cccb3 100644 --- a/test/level0/Makefile +++ b/test/level0/Makefile @@ -62,11 +62,11 @@ LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else -CONFIG_NAME := $(shell grep -E "CONFIG_NAME *:=" ../../config.mk | sed 's/.*:= *//') -DIST_PATH := ../.. -LIB_PATH = ../../lib/$(CONFIG_NAME) -INC_PATH = ../../include/$(CONFIG_NAME) -SHARE_PATH := ../.. +DIST_PATH := ../.. +CONFIG_NAME := $(shell grep -E "CONFIG_NAME *:=" $(DIST_PATH)/config.mk | sed 's/.*:= *//') +LIB_PATH = $(DIST_PATH)/lib/$(CONFIG_NAME) +INC_PATH = $(DIST_PATH)/include/$(CONFIG_NAME) +SHARE_PATH := $(DIST_PATH) endif diff --git a/test/level0/test_l0.cxx b/test/level0/test_l0.cxx index 86af4e0486..fd55c43cf6 100644 --- a/test/level0/test_l0.cxx +++ b/test/level0/test_l0.cxx @@ -43,5 +43,5 @@ unit_test_registrar& get_unit_test_registrar() int main() { - return !get_unit_test_registrar().run_tests(); + return !!get_unit_test_registrar().run_tests(); } diff --git a/test/level0/test_tsubs.cxx b/test/level0/test_tsubs.cxx index 3613c32454..bcc2caa9b5 100644 --- a/test/level0/test_tsubs.cxx +++ b/test/level0/test_tsubs.cxx @@ -60,6 +60,7 @@ UNIT_TEST(chx,chy,chc,opname) \ INFO( "y (BLIS): " << y ); \ \ check( y, y0 ); \ + /* force a failure to make sure CI works */ check( 1.0, 2.0 ); \ } \ ) From 106f5852a63fdbba17ea06734cfaf6f76062401f Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 26 Feb 2025 16:02:05 -0600 Subject: [PATCH 17/19] Error out on missing function declaration. [ci skip] for now. --- common.mk | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/common.mk b/common.mk index b744a1b7f9..584eb20f43 100644 --- a/common.mk +++ b/common.mk @@ -775,6 +775,11 @@ endif CWARNFLAGS := +# Do not allow functions with implicit definitions to be called +ifneq ($(CC_VENDOR),ibm) +CWARNFLAGS += -Werror=implicit-function-declaration +endif + # Disable unused function warnings and stop compiling on first error for # all compilers that accept such options: gcc, clang, and icc. ifneq ($(CC_VENDOR),ibm) From 7f6d81c3f4498c3ac5c041721288a5957043c265 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 26 Feb 2025 21:09:53 -0600 Subject: [PATCH 18/19] Add more legacy macros needed by x86 kernels. --- frame/include/level0/bli_taxpbys.h | 7 +++++++ frame/include/level0/bli_taxpys.h | 7 +++++++ frame/include/level0/bli_tcopycjs.h | 7 +++++++ frame/include/level0/bli_tcopys.h | 8 ++++++++ frame/include/level0/bli_tdots.h | 7 +++++++ frame/include/level0/bli_tscal2s.h | 7 +++++++ frame/include/level0/bli_tscals.h | 12 ++++++++++++ frame/include/level0/bli_tsets.h | 5 +++++ frame/include/level0/bli_tswaps.h | 8 ++++++++ 9 files changed, 68 insertions(+) diff --git a/frame/include/level0/bli_taxpbys.h b/frame/include/level0/bli_taxpbys.h index 93b3368e00..6fbfc32309 100644 --- a/frame/include/level0/bli_taxpbys.h +++ b/frame/include/level0/bli_taxpbys.h @@ -265,6 +265,13 @@ } \ } +// -- Legacy macros ------------------------------------------------------------ + +#define bli_saxpbys( a, x, b, y ) bli_taxpbys( s,s,s,s,s, a, x, b, y ) +#define bli_daxpbys( a, x, b, y ) bli_taxpbys( d,d,d,d,d, a, x, b, y ) +#define bli_caxpbys( a, x, b, y ) bli_taxpbys( c,c,c,c,s, a, x, b, y ) +#define bli_zaxpbys( a, x, b, y ) bli_taxpbys( z,z,z,z,d, a, x, b, y ) + // -- Notes -------------------------------------------------------------------- #endif diff --git a/frame/include/level0/bli_taxpys.h b/frame/include/level0/bli_taxpys.h index b5b89c678c..df81cc102d 100644 --- a/frame/include/level0/bli_taxpys.h +++ b/frame/include/level0/bli_taxpys.h @@ -194,6 +194,13 @@ // -- Higher-level static functions -------------------------------------------- +// -- Legacy macros ------------------------------------------------------------ + +#define bli_saxpys( a, x, y ) bli_taxpys( s,s,s,s, a, x, y ) +#define bli_daxpys( a, x, y ) bli_taxpys( d,d,d,d, a, x, y ) +#define bli_caxpys( a, x, y ) bli_taxpys( c,c,c,s, a, x, y ) +#define bli_zaxpys( a, x, y ) bli_taxpys( z,z,z,d, a, x, y ) + // -- Notes -------------------------------------------------------------------- // -- Domain cases -- diff --git a/frame/include/level0/bli_tcopycjs.h b/frame/include/level0/bli_tcopycjs.h index 538cd5c124..74f843833b 100644 --- a/frame/include/level0/bli_tcopycjs.h +++ b/frame/include/level0/bli_tcopycjs.h @@ -98,6 +98,13 @@ // -- Higher-level static functions -------------------------------------------- +// -- Legacy macros ------------------------------------------------------------ + +#define bli_scopycjs( conj, x, y ) bli_tcopycjs( s,s, conj, x, y ) +#define bli_dcopycjs( conj, x, y ) bli_tcopycjs( d,d, conj, x, y ) +#define bli_ccopycjs( conj, x, y ) bli_tcopycjs( c,c, conj, x, y ) +#define bli_zcopycjs( conj, x, y ) bli_tcopycjs( z,z, conj, x, y ) + // -- Notes -------------------------------------------------------------------- // -- Domain cases -- diff --git a/frame/include/level0/bli_tcopys.h b/frame/include/level0/bli_tcopys.h index d4b8068e31..70722a3d37 100644 --- a/frame/include/level0/bli_tcopys.h +++ b/frame/include/level0/bli_tcopys.h @@ -222,6 +222,14 @@ } \ } +// -- Legacy macros ------------------------------------------------------------ + +#define bli_scopys( x, y ) bli_tcopys( s,s, x, y ) +#define bli_dcopys( x, y ) bli_tcopys( d,d, x, y ) +#define bli_ccopys( x, y ) bli_tcopys( c,c, x, y ) +#define bli_zcopys( x, y ) bli_tcopys( z,z, x, y ) +#define bli_icopys( x, y ) bli_tcopys( i,i, x, y ) + // -- Notes -------------------------------------------------------------------- // -- Domain cases -- diff --git a/frame/include/level0/bli_tdots.h b/frame/include/level0/bli_tdots.h index 4a4b0f46b3..b83c4f3b29 100644 --- a/frame/include/level0/bli_tdots.h +++ b/frame/include/level0/bli_tdots.h @@ -73,6 +73,13 @@ // -- Higher-level static functions -------------------------------------------- +// -- Legacy macros ------------------------------------------------------------ + +#define bli_sdots( x, y, a ) bli_tdots( s,s,s,s, x, y, a ) +#define bli_ddots( x, y, a ) bli_tdots( d,d,d,d, x, y, a ) +#define bli_cdots( x, y, a ) bli_tdots( c,c,c,s, x, y, a ) +#define bli_zdots( x, y, a ) bli_tdots( z,z,z,d, x, y, a ) + // -- Notes -------------------------------------------------------------------- // -- Domain cases -- diff --git a/frame/include/level0/bli_tscal2s.h b/frame/include/level0/bli_tscal2s.h index 342727d21c..6243b030d5 100644 --- a/frame/include/level0/bli_tscal2s.h +++ b/frame/include/level0/bli_tscal2s.h @@ -607,6 +607,13 @@ PASTECH(bli_tscal2bbs_mxn_,PASTEMAC(chy,dom)) \ y, incy, ldy \ ) \ +// -- Legacy macros ------------------------------------------------------------ + +#define bli_sscal2s( a, x, y ) bli_tscal2s( s,s,s,s, a, x, y ) +#define bli_dscal2s( a, x, y ) bli_tscal2s( d,d,d,d, a, x, y ) +#define bli_cscal2s( a, x, y ) bli_tscal2s( c,c,c,s, a, x, y ) +#define bli_zscal2s( a, x, y ) bli_tscal2s( z,z,z,d, a, x, y ) + // -- Notes -------------------------------------------------------------------- // -- Domain cases -- diff --git a/frame/include/level0/bli_tscals.h b/frame/include/level0/bli_tscals.h index f0d5e18c5a..30292d6f7a 100644 --- a/frame/include/level0/bli_tscals.h +++ b/frame/include/level0/bli_tscals.h @@ -248,6 +248,18 @@ } \ } +// -- Legacy macros ------------------------------------------------------------ + +#define bli_sscals( a, x ) bli_tscals( s,s,s, a, x ) +#define bli_dscals( a, x ) bli_tscals( d,d,d, a, x ) +#define bli_cscals( a, x ) bli_tscals( c,c,s, a, x ) +#define bli_zscals( a, x ) bli_tscals( z,z,d, a, x ) + +#define bli_ssscals( a, x ) bli_tscals( s,s,s, a, x ) +#define bli_ddscals( a, x ) bli_tscals( d,d,d, a, x ) +#define bli_ccscals( a, x ) bli_tscals( c,c,s, a, x ) +#define bli_zzscals( a, x ) bli_tscals( z,z,d, a, x ) + // -- Notes -------------------------------------------------------------------- // -- Domain cases -- diff --git a/frame/include/level0/bli_tsets.h b/frame/include/level0/bli_tsets.h index e4806a3db1..05a86b3f72 100644 --- a/frame/include/level0/bli_tsets.h +++ b/frame/include/level0/bli_tsets.h @@ -293,6 +293,11 @@ // -- Legacy macros ------------------------------------------------------------ +#define bli_sset0s( x ) bli_tset0s( s, x ) +#define bli_dset0s( x ) bli_tset0s( d, x ) +#define bli_cset0s( x ) bli_tset0s( c, x ) +#define bli_zset0s( x ) bli_tset0s( z, x ) + #define bli_sset0s_edge( i, m, j, n, p, ldp ) bli_tset0s_edge( s, i, m, j, n, (float *)(p), ldp ) #define bli_dset0s_edge( i, m, j, n, p, ldp ) bli_tset0s_edge( d, i, m, j, n, (double *)(p), ldp ) #define bli_cset0s_edge( i, m, j, n, p, ldp ) bli_tset0s_edge( c, i, m, j, n, (scomplex*)(p), ldp ) diff --git a/frame/include/level0/bli_tswaps.h b/frame/include/level0/bli_tswaps.h index 85fb24bb4e..fd7f4dfebd 100644 --- a/frame/include/level0/bli_tswaps.h +++ b/frame/include/level0/bli_tswaps.h @@ -106,6 +106,14 @@ // -- Higher-level static functions -------------------------------------------- +// -- Legacy macros ------------------------------------------------------------ + + +#define bli_sswaps( x, y ) bli_tswaps( s,s, x, y ) +#define bli_dswaps( x, y ) bli_tswaps( d,d, x, y ) +#define bli_cswaps( x, y ) bli_tswaps( c,c, x, y ) +#define bli_zswaps( x, y ) bli_tswaps( z,z, x, y ) + // -- Notes -------------------------------------------------------------------- // -- Domain cases -- From 78bf23a873ca30bd907880f634613d03574e3deb Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 26 Feb 2025 21:33:34 -0600 Subject: [PATCH 19/19] One last legacy macro, and disable check for CI correctness. --- frame/include/level0/bli_tinverts.h | 7 +++++++ test/level0/test_tsubs.cxx | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/frame/include/level0/bli_tinverts.h b/frame/include/level0/bli_tinverts.h index c797fa3f18..701a335999 100644 --- a/frame/include/level0/bli_tinverts.h +++ b/frame/include/level0/bli_tinverts.h @@ -213,6 +213,13 @@ // -- Higher-level static functions -------------------------------------------- +// -- Legacy macros ------------------------------------------------------------ + +#define bli_sinverts( x ) bli_tinverts( s,s, x ) +#define bli_dinverts( x ) bli_tinverts( d,d, x ) +#define bli_cinverts( x ) bli_tinverts( c,c, x ) +#define bli_zinverts( x ) bli_tinverts( z,z, x ) + // -- Notes -------------------------------------------------------------------- #endif diff --git a/test/level0/test_tsubs.cxx b/test/level0/test_tsubs.cxx index bcc2caa9b5..3613c32454 100644 --- a/test/level0/test_tsubs.cxx +++ b/test/level0/test_tsubs.cxx @@ -60,7 +60,6 @@ UNIT_TEST(chx,chy,chc,opname) \ INFO( "y (BLIS): " << y ); \ \ check( y, y0 ); \ - /* force a failure to make sure CI works */ check( 1.0, 2.0 ); \ } \ )