From f856cace50a0b7fb9c78a6337b869da151fd1a78 Mon Sep 17 00:00:00 2001 From: ctaylor Date: Tue, 1 Nov 2022 07:55:37 -0500 Subject: [PATCH 01/21] initial import --- Makefile | 1 + build/bli_config.h.in | 9 ++ common.mk | 12 +++ config/generic/bli_cntx_init_generic.c | 1 + configure | 35 ++++++- frame/include/bli_type_defs.h | 4 +- frame/thread/bli_thrcomm.h | 8 ++ frame/thread/bli_thrcomm_hpx.cpp | 89 ++++++++++++++++++ frame/thread/bli_thrcomm_hpx.hpp | 57 ++++++++++++ frame/thread/bli_thread.c | 12 +++ frame/thread/bli_thread.h | 1 + frame/thread/bli_thread_hpx.cpp | 122 +++++++++++++++++++++++++ frame/thread/bli_thread_hpx.hpp | 52 +++++++++++ 13 files changed, 400 insertions(+), 3 deletions(-) create mode 100644 frame/thread/bli_thrcomm_hpx.cpp create mode 100644 frame/thread/bli_thrcomm_hpx.hpp create mode 100644 frame/thread/bli_thread_hpx.cpp create mode 100644 frame/thread/bli_thread_hpx.hpp diff --git a/Makefile b/Makefile index 04cdca4214..8869b8d2e5 100644 --- a/Makefile +++ b/Makefile @@ -503,6 +503,7 @@ flat-header: check-env $(BLIS_H_FLAT) $(BLIS_H_FLAT): $(ALL_H99_FILES) ifeq ($(ENABLE_VERBOSE),yes) + @echo -n "**HERE->***\t$(FLATTEN_H)" $(FLATTEN_H) -l -v1 $(BLIS_H_SRC_PATH) $@ "./$(INCLUDE_DIR)" "$(ALL_H99_DIRPATHS)" else @echo -n "Generating monolithic blis.h" diff --git a/build/bli_config.h.in b/build/bli_config.h.in index 5208a90f81..716b6e22fc 100644 --- a/build/bli_config.h.in +++ b/build/bli_config.h.in @@ -65,6 +65,15 @@ #endif #endif +#if @enable_hpx@ +#define BLIS_ENABLE_HPX +#if @enable_hpx_as_def@ +#define BLIS_ENABLE_HPX_AS_DEFAULT +#endif +#endif + + + #if @enable_jrir_slab@ #define BLIS_ENABLE_JRIR_SLAB #endif diff --git a/common.mk b/common.mk index e69b977824..0355b16987 100644 --- a/common.mk +++ b/common.mk @@ -836,6 +836,10 @@ ifneq ($(findstring pthreads,$(THREADING_MODEL)),) CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif +ifneq ($(findstring hpx,$(THREADING_MODEL)),) +CTHREADFLAGS += `pkg-config --cflags hpx_application_debug` +LDFLAGS += `pkg-config --libs hpx_application_debug` +endif endif ifeq ($(CC_VENDOR),icc) @@ -850,6 +854,10 @@ ifneq ($(findstring pthreads,$(THREADING_MODEL)),) CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif +ifneq ($(findstring hpx,$(THREADING_MODEL)),) +CTHREADFLAGS += `pkg-config --cflags hpx_application_debug` +LDFLAGS += `pkg-config --libs hpx_application_debug` +endif endif ifeq ($(CC_VENDOR),clang) @@ -864,6 +872,10 @@ ifneq ($(findstring pthreads,$(THREADING_MODEL)),) CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif +ifneq ($(findstring hpx,$(THREADING_MODEL)),) +CTHREADFLAGS += `pkg-config --cflags hpx_application_debug` +LDFLAGS += `pkg-config --libs hpx_application_debug` +endif endif # --- #pragma omp simd flags (used for reference kernels only) --- diff --git a/config/generic/bli_cntx_init_generic.c b/config/generic/bli_cntx_init_generic.c index 70bbb80f96..287c5ef3e0 100644 --- a/config/generic/bli_cntx_init_generic.c +++ b/config/generic/bli_cntx_init_generic.c @@ -33,6 +33,7 @@ */ #include "blis.h" +#include "bli_type_defs.h" void bli_cntx_init_generic( cntx_t* cntx ) { diff --git a/configure b/configure index 37399fbde2..7406bdc4c4 100755 --- a/configure +++ b/configure @@ -175,7 +175,7 @@ print_usage() echo " BLIS, and the choice of which to use will be determined at" echo " runtime. If the user does not express a preference (by" echo " setting the BLIS_THREAD_IMPL environment variable to" - echo " 'single', 'openmp', or 'pthreads'; by calling the global" + echo " 'single', 'openmp', 'pthreads', or 'hpx'; by calling the global" echo " runtime API bli_thread_set_thread_impl(); or by encoding a" echo " choice on a per-call basis within a rntm_t passed into the" echo " expert API), then the first model listed in MODEL will be" @@ -3487,14 +3487,17 @@ main() enable_single='yes' enable_openmp='no' enable_pthreads='no' + enable_hpx='no' enable_single_01=1 enable_openmp_01=0 enable_pthreads_01=0 + enable_hpx_01=0 parsed_tm='' first_tm='' enable_single_as_def_01=0 enable_openmp_as_def_01=0 enable_pthreads_as_def_01=0 + enable_hpx_as_def_01=0 # Convert whatever reasonable separator the user may have used into a space. threading_model_list=$(echo "${threading_model}" | sed -e "s/[,+]/ /g") @@ -3522,6 +3525,11 @@ main() parsed_tm="${parsed_tm} pthreads" + elif [ "x${word}" = "xhpx" ]; then + + parsed_tm="${parsed_tm} hpx" + + elif [ "x${word}" = "xauto" ]; then parsed_tm="${parsed_tm} auto" @@ -3613,7 +3621,14 @@ main() echo "${script_name}: enabling support for threading via pthreads." enable_pthreads='yes' enable_pthreads_01=1 + + elif [ "x${word}" = "xhpx" ]; then + + echo "${script_name}: enabling support for threading via hpx." + enable_hpx='yes' + enable_hpx_01=1 fi + done # Define boolean variables that can easily be interpreted with #ifdef @@ -3623,25 +3638,37 @@ main() enable_single_as_def_01=1 enable_openmp_as_def_01=0 enable_pthreads_as_def_01=0 + enable_hpx_as_def_01=0 elif [ "x${first_tm}" = "xopenmp" ]; then enable_single_as_def_01=0 enable_openmp_as_def_01=1 enable_pthreads_as_def_01=0 + enable_hpx_as_def_01=0 elif [ "x${first_tm}" = "xpthreads" ]; then enable_single_as_def_01=0 enable_openmp_as_def_01=0 enable_pthreads_as_def_01=1 + enable_hpx_as_def_01=0 + + elif [ "x${first_tm}" = "xhpx" ]; then + + enable_single_as_def_01=0 + enable_openmp_as_def_01=0 + enable_pthreads_as_def_01=0 + enable_hpx_as_def_01=1 + fi # If either OpenMP or pthreads was enabled, given that single-threaded mode is # also always enabled, remind the user which one will serve as the default # (that is, absent any explicit choice at runtime). if [ "x${enable_openmp}" = "xyes" ] || - [ "x${enable_pthreads}" = "xyes" ]; then + [ "x${enable_pthreads}" = "xyes" ] || + [ "x${enable_hpx}" = "xyes" ]; then if [ "x${first_tm}" = "xsingle" ]; then echo "${script_name}: threading will default to single-threaded." @@ -3649,6 +3676,8 @@ main() echo "${script_name}: threading will default to OpenMP." elif [ "x${first_tm}" = "xpthreads" ]; then echo "${script_name}: threading will default to pthreads." + elif [ "x${first_tm}" = "xhpx" ]; then + echo "${script_name}: threading will default to HPX." fi fi @@ -4103,6 +4132,8 @@ main() | sed -e "s/@enable_openmp_as_def@/${enable_openmp_as_def_01}/g" \ | sed -e "s/@enable_pthreads@/${enable_pthreads_01}/g" \ | sed -e "s/@enable_pthreads_as_def@/${enable_pthreads_as_def_01}/g" \ + | sed -e "s/@enable_hpx@/${enable_hpx_01}/g" \ + | sed -e "s/@enable_hpx_as_def@/${enable_hpx_as_def_01}/g" \ | sed -e "s/@enable_jrir_slab@/${enable_jrir_slab_01}/g" \ | sed -e "s/@enable_jrir_rr@/${enable_jrir_rr_01}/g" \ | sed -e "s/@enable_pba_pools@/${enable_pba_pools_01}/g" \ diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 0c5d11e6b6..014be18b77 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -44,9 +44,10 @@ #ifdef __cplusplus // For C++, include stdint.h. - #include + #include #elif __STDC_VERSION__ >= 199901L // For C99 (or later), include stdint.h. + #include #include #include #else @@ -629,6 +630,7 @@ typedef enum BLIS_SINGLE = 0, BLIS_OPENMP, BLIS_POSIX, + BLIS_HPX, // BLIS_NUM_THREAD_IMPLS must be last! BLIS_NUM_THREAD_IMPLS diff --git a/frame/thread/bli_thrcomm.h b/frame/thread/bli_thrcomm.h index 7abd190c76..94f37aee4b 100644 --- a/frame/thread/bli_thrcomm.h +++ b/frame/thread/bli_thrcomm.h @@ -94,6 +94,13 @@ typedef struct thrcomm_s #endif #endif + #ifdef BLIS_ENABLE_PTHREADS + #ifdef BLIS_USE_PTHREAD_BARRIER + hpx::barrier<> * barrier; + #endif + #endif + + } thrcomm_t; @@ -105,6 +112,7 @@ typedef struct thrcomm_s #include "bli_thrcomm_single.h" #include "bli_thrcomm_openmp.h" #include "bli_thrcomm_pthreads.h" +#include "bli_thrcomm_hpx.hpp" // Define a function pointer type for each of the functions that are // "overloaded" by each method of multithreading. diff --git a/frame/thread/bli_thrcomm_hpx.cpp b/frame/thread/bli_thrcomm_hpx.cpp new file mode 100644 index 0000000000..92e4281a84 --- /dev/null +++ b/frame/thread/bli_thrcomm_hpx.cpp @@ -0,0 +1,89 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#ifdef BLIS_ENABLE_HPX + +#ifdef BLIS_USE_HPX_BARRIER + +// Define the pthread_barrier_t implementations of the init, cleanup, and +// barrier functions. + +void bli_thrcomm_init_hpx( dim_t n_threads, thrcomm_t* comm ) +{ + if ( comm == NULL ) return; + comm->barrier = new hpx:barrier<>(); +} + +void bli_thrcomm_cleanup_hpx( thrcomm_t* comm ) +{ + if ( comm == NULL ) return; + delete comm->barrier; +} + +void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm ) +{ + comm->barrier->arrive_and_wait(); +} + +#else + +// Define the non-pthread_barrier_t implementations of the init, cleanup, +// and barrier functions. These are the default unless the pthread_barrier_t +// versions are requested at compile-time. + +void bli_thrcomm_init_hpx( dim_t n_threads, thrcomm_t* comm ) +{ + if ( comm == NULL ) return; + comm->sent_object = NULL; + comm->n_threads = n_threads; + comm->barrier_sense = 0; + comm->barrier_threads_arrived = 0; +} + +void bli_thrcomm_cleanup_hpx( thrcomm_t* comm ) +{ +} + +void bli_thrcomm_barrier_hpx( dim_t t_id, thrcomm_t* comm ) +{ + bli_thrcomm_barrier_atomic( t_id, comm ); +} + +#endif + +#endif + diff --git a/frame/thread/bli_thrcomm_hpx.hpp b/frame/thread/bli_thrcomm_hpx.hpp new file mode 100644 index 0000000000..9071249609 --- /dev/null +++ b/frame/thread/bli_thrcomm_hpx.hpp @@ -0,0 +1,57 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_THRCOMM_HPX_H +#define BLIS_THRCOMM_HPX_H + +// Define these prototypes for situations when POSIX multithreading is enabled. +#ifdef BLIS_ENABLE_HPX + +#ifdef __cplusplus +extern "C" { +#endif + +// pthreads-specific function prototypes. +void bli_thrcomm_init_hpx( dim_t nt, thrcomm_t* comm ); +void bli_thrcomm_cleanup_hpx( thrcomm_t* comm ); +void bli_thrcomm_barrier_hpx( dim_t tid, thrcomm_t* comm ); + +#ifdef __cplusplus +} +#endif + +#endif + +#endif + diff --git a/frame/thread/bli_thread.c b/frame/thread/bli_thread.c index 8904c88e3b..0b0daad98c 100644 --- a/frame/thread/bli_thread.c +++ b/frame/thread/bli_thread.c @@ -35,6 +35,10 @@ #include "blis.h" +#ifdef BLIS_ENABLE_HPX +#include "bli_thread_hpx.hpp" +#endif + thrcomm_t BLIS_SINGLE_COMM = {}; // The global rntm_t structure. (The definition resides in bli_rntm.c.) @@ -69,6 +73,14 @@ static thread_launch_t thread_launch_fpa[ BLIS_NUM_THREAD_IMPLS ] = NULL, #else NULL, +#endif + [BLIS_HPX] = +#if defined(BLIS_ENABLE_OPENMP) + bli_thread_launch_hpx, +#elif defined(BLIS_ENABLE_PTHREADS) + NULL, +#else + NULL, #endif }; diff --git a/frame/thread/bli_thread.h b/frame/thread/bli_thread.h index 821e2fe7c0..45e7142dc1 100644 --- a/frame/thread/bli_thread.h +++ b/frame/thread/bli_thread.h @@ -49,6 +49,7 @@ typedef void (*thread_func_t)( thrcomm_t* gl_comm, dim_t tid, const void* params // Include threading implementations. #include "bli_thread_openmp.h" #include "bli_thread_pthreads.h" +#include "bli_thread_hpx.hpp" #include "bli_thread_single.h" // Initialization-related prototypes. diff --git a/frame/thread/bli_thread_hpx.cpp b/frame/thread/bli_thread_hpx.cpp new file mode 100644 index 0000000000..f8c05e9919 --- /dev/null +++ b/frame/thread/bli_thread_hpx.cpp @@ -0,0 +1,122 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#ifdef BLIS_ENABLE_HPX + +#include +#include + +// A data structure to assist in passing operands to additional threads. +typedef struct thread_data +{ + dim_t tid; + thrcomm_t* gl_comm; + thread_func_t func; + const void* params; +} thread_data_t; + +// Entry point for additional threads +static void* bli_hpx_thread_entry( void* data_void ) +{ + const thread_data_t* data = data_void; + + const dim_t tid = data->tid; + thrcomm_t* gl_comm = data->gl_comm; + thread_func_t func = data->func; + const void* params = data->params; + + // Call the thread entry point, passing the global communicator, the + // thread id, and the params struct as arguments. + func( gl_comm, tid, params ); + + return NULL; +} + +void bli_thread_launch_hpx( dim_t n_threads, thread_func_t func, const void* params ) +{ + err_t r_val; + + const timpl_t ti = BLIS_HPX; + + // Allocate a global communicator for the root thrinfo_t structures. + pool_t* gl_comm_pool = NULL; + thrcomm_t* gl_comm = bli_thrcomm_create( ti, gl_comm_pool, n_threads ); + + // Allocate an array of pthread objects and auxiliary data structs to pass + // to the thread entry functions. + + #ifdef BLIS_ENABLE_MEM_TRACING + printf( "bli_l3_thread_decorator().pth: " ); + #endif + bli_pthread_t* pthreads = bli_malloc_intl( sizeof( bli_pthread_t ) * n_threads, &r_val ); + + #ifdef BLIS_ENABLE_MEM_TRACING + printf( "bli_l3_thread_decorator().pth: " ); + #endif + thread_data_t* datas = bli_malloc_intl( sizeof( thread_data_t ) * n_threads, &r_val ); + + // NOTE: We must iterate backwards so that the chief thread (thread id 0) + // can spawn all other threads before proceeding with its own computation. + auto irange = hpx::util::detail::make_counting_shape(num_seg); + + hpx::for_each(hpx::execution::par, hpx::util::begin(irange), hpx::util::end(irange), + [&datas, &gl_comm, &func, ¶ms](const dim_t tid) { + // Set up thread data for additional threads (beyond thread 0). + datas[tid].tid = tid; + datas[tid].gl_comm = gl_comm; + datas[tid].func = func; + datas[tid].params = params; + + bli_hpx_thread_entry(&datas[0]); + }); + + // Free the global communicator, because the root thrinfo_t node + // never frees its communicator. + bli_thrcomm_free( gl_comm_pool, gl_comm ); + + // Free the array of pthread objects and auxiliary data structs. + #ifdef BLIS_ENABLE_MEM_TRACING + printf( "bli_l3_thread_decorator().pth: " ); + #endif + bli_free_intl( pthreads ); + + #ifdef BLIS_ENABLE_MEM_TRACING + printf( "bli_l3_thread_decorator().pth: " ); + #endif + bli_free_intl( datas ); +} + +#endif diff --git a/frame/thread/bli_thread_hpx.hpp b/frame/thread/bli_thread_hpx.hpp new file mode 100644 index 0000000000..f42364c1bf --- /dev/null +++ b/frame/thread/bli_thread_hpx.hpp @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_THREAD_HPX_H +#define BLIS_THREAD_HPX_H + +// Definitions specific to situations when POSIX multithreading is enabled. +#ifdef BLIS_ENABLE_HPX + +#include "bli_type_defs.h" +#include "bli_thread.h" + +extern "C" { + +void bli_thread_launch_hpx(dim_t n_threads, thread_func_t func, const void* params); + +} + +#endif + +#endif From c7a7b00664e54785a2d2b5d96d9f3821801d1ad6 Mon Sep 17 00:00:00 2001 From: ctaylor Date: Tue, 1 Nov 2022 09:06:10 -0500 Subject: [PATCH 02/21] initial import --- Makefile | 3 +- common.mk | 24 ++++++++-- frame/thread/bli_thread.h | 2 +- frame/thread/bli_thread_hpx.h | 45 +++++++++++++++++++ ...thread_hpx.cpp => bli_thread_hpx_impl.cpp} | 8 ++++ ...thread_hpx.hpp => bli_thread_hpx_impl.hpp} | 11 ++--- 6 files changed, 82 insertions(+), 11 deletions(-) create mode 100644 frame/thread/bli_thread_hpx.h rename frame/thread/{bli_thread_hpx.cpp => bli_thread_hpx_impl.cpp} (98%) rename frame/thread/{bli_thread_hpx.hpp => bli_thread_hpx_impl.hpp} (94%) diff --git a/Makefile b/Makefile index 8869b8d2e5..37026022cf 100644 --- a/Makefile +++ b/Makefile @@ -503,7 +503,6 @@ flat-header: check-env $(BLIS_H_FLAT) $(BLIS_H_FLAT): $(ALL_H99_FILES) ifeq ($(ENABLE_VERBOSE),yes) - @echo -n "**HERE->***\t$(FLATTEN_H)" $(FLATTEN_H) -l -v1 $(BLIS_H_SRC_PATH) $@ "./$(INCLUDE_DIR)" "$(ALL_H99_DIRPATHS)" else @echo -n "Generating monolithic blis.h" @@ -547,6 +546,8 @@ endef # config_name, used to look up the CFLAGS to use during compilation. define make-frame-rule $(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.c $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) +$(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.cpp $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) + ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-frame-cflags-for,$(1)) -c $$< -o $$@ else diff --git a/common.mk b/common.mk index 0355b16987..eab31a9e04 100644 --- a/common.mk +++ b/common.mk @@ -112,6 +112,7 @@ get-noopt-cxxflags-for = $(strip $(CFLAGS_PRESET) \ $(call load-var-for,CXXLANGFLAGS,$(1)) \ $(call load-var-for,CPPROCFLAGS,$(1)) \ $(CTHREADFLAGS) \ + $(CXXTHREADFLAGS) \ $(CINCFLAGS) $(VERS_DEF) \ ) @@ -348,7 +349,7 @@ REFNM := ref # Source suffixes. CONFIG_SRC_SUFS := c KERNELS_SRC_SUFS := c s S -FRAME_SRC_SUFS := c +FRAME_SRC_SUFS := c cpp ADDON_C99_SUFS := c ADDON_CXX_SUFS := cc cpp cxx @@ -837,8 +838,13 @@ CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif ifneq ($(findstring hpx,$(THREADING_MODEL)),) -CTHREADFLAGS += `pkg-config --cflags hpx_application_debug` +ifeq ($(debug_flag),1) +CXXTHREADFLAGS += `pkg-config --cflags hpx_application_debug` LDFLAGS += `pkg-config --libs hpx_application_debug` +else +CXXTHREADFLAGS += `pkg-config --cflags hpx_application` +LDFLAGS += `pkg-config --libs hpx_application` +endif endif endif @@ -855,8 +861,13 @@ CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif ifneq ($(findstring hpx,$(THREADING_MODEL)),) -CTHREADFLAGS += `pkg-config --cflags hpx_application_debug` +ifeq ($(debug_flag),1) +CXXTHREADFLAGS += `pkg-config --cflags hpx_application_debug` LDFLAGS += `pkg-config --libs hpx_application_debug` +else +CXXTHREADFLAGS += `pkg-config --cflags hpx_application` +LDFLAGS += `pkg-config --libs hpx_application` +endif endif endif @@ -873,8 +884,13 @@ CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif ifneq ($(findstring hpx,$(THREADING_MODEL)),) -CTHREADFLAGS += `pkg-config --cflags hpx_application_debug` +ifeq ($(debug_flag),1) +CXXTHREADFLAGS += `pkg-config --cflags hpx_application_debug` LDFLAGS += `pkg-config --libs hpx_application_debug` +else +CXXTHREADFLAGS += `pkg-config --cflags hpx_application` +LDFLAGS += `pkg-config --libs hpx_application` +endif endif endif diff --git a/frame/thread/bli_thread.h b/frame/thread/bli_thread.h index 45e7142dc1..e61fc8b892 100644 --- a/frame/thread/bli_thread.h +++ b/frame/thread/bli_thread.h @@ -49,7 +49,7 @@ typedef void (*thread_func_t)( thrcomm_t* gl_comm, dim_t tid, const void* params // Include threading implementations. #include "bli_thread_openmp.h" #include "bli_thread_pthreads.h" -#include "bli_thread_hpx.hpp" +#include "bli_thread_hpx.h" #include "bli_thread_single.h" // Initialization-related prototypes. diff --git a/frame/thread/bli_thread_hpx.h b/frame/thread/bli_thread_hpx.h new file mode 100644 index 0000000000..98887c7927 --- /dev/null +++ b/frame/thread/bli_thread_hpx.h @@ -0,0 +1,45 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_THREAD_HPX_H +#define BLIS_THREAD_HPX_H + +// Definitions specific to situations when POSIX multithreading is enabled. +#ifdef BLIS_ENABLE_HPX + +#include "bli_thread_hpx_impl.hpp" + +#endif + +#endif diff --git a/frame/thread/bli_thread_hpx.cpp b/frame/thread/bli_thread_hpx_impl.cpp similarity index 98% rename from frame/thread/bli_thread_hpx.cpp rename to frame/thread/bli_thread_hpx_impl.cpp index f8c05e9919..f7a0194da6 100644 --- a/frame/thread/bli_thread_hpx.cpp +++ b/frame/thread/bli_thread_hpx_impl.cpp @@ -65,6 +65,10 @@ static void* bli_hpx_thread_entry( void* data_void ) return NULL; } +#ifdef __cplusplus +extern "C" { +#endif + void bli_thread_launch_hpx( dim_t n_threads, thread_func_t func, const void* params ) { err_t r_val; @@ -119,4 +123,8 @@ void bli_thread_launch_hpx( dim_t n_threads, thread_func_t func, const void* par bli_free_intl( datas ); } +#ifdef __cplusplus +} // end extern "C" +#endif + #endif diff --git a/frame/thread/bli_thread_hpx.hpp b/frame/thread/bli_thread_hpx_impl.hpp similarity index 94% rename from frame/thread/bli_thread_hpx.hpp rename to frame/thread/bli_thread_hpx_impl.hpp index f42364c1bf..a21ea466e8 100644 --- a/frame/thread/bli_thread_hpx.hpp +++ b/frame/thread/bli_thread_hpx_impl.hpp @@ -32,20 +32,21 @@ */ -#ifndef BLIS_THREAD_HPX_H -#define BLIS_THREAD_HPX_H +#ifndef BLIS_THREAD_HPX_IMPL_H +#define BLIS_THREAD_HPX_IMPL_H // Definitions specific to situations when POSIX multithreading is enabled. #ifdef BLIS_ENABLE_HPX -#include "bli_type_defs.h" -#include "bli_thread.h" - +#ifdef __cplusplus extern "C" { +#endif void bli_thread_launch_hpx(dim_t n_threads, thread_func_t func, const void* params); +#ifdef __cplusplus } +#endif #endif From 6bdbe170ad7bc03393b605da0ffe0726bcd1266b Mon Sep 17 00:00:00 2001 From: ctaylor Date: Tue, 1 Nov 2022 14:53:09 -0500 Subject: [PATCH 03/21] initial import --- Makefile | 12 ++++++-- build/config.mk.in | 1 + common.mk | 29 +++++++++++-------- configure | 3 ++ frame/thread/bli_thrcomm.h | 2 +- ...rcomm_hpx.cpp => bli_thrcomm_hpx_impl.cpp} | 16 ++++++++++ ...rcomm_hpx.hpp => bli_thrcomm_hpx_impl.hpp} | 4 +-- frame/thread/bli_thread.c | 6 ++-- frame/thread/bli_thread_hpx_impl.cpp | 8 ++--- 9 files changed, 56 insertions(+), 25 deletions(-) rename frame/thread/{bli_thrcomm_hpx.cpp => bli_thrcomm_hpx_impl.cpp} (95%) rename frame/thread/{bli_thrcomm_hpx.hpp => bli_thrcomm_hpx_impl.hpp} (97%) diff --git a/Makefile b/Makefile index 37026022cf..87356f9978 100644 --- a/Makefile +++ b/Makefile @@ -546,14 +546,22 @@ endef # config_name, used to look up the CFLAGS to use during compilation. define make-frame-rule $(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.c $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) -$(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.cpp $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) - ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-frame-cflags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-frame-text-for,$(1)) @$(CC) $(call get-frame-cflags-for,$(1)) -c $$< -o $$@ endif + +$(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.cpp $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) +ifeq ($(ENABLE_HPX),yes) +ifeq ($(ENABLE_VERBOSE),yes) + $(CXX) $(call get-addon-cxxflags-for,$(1)) -c $$< -o $$@ +else + @echo "Compiling $$@" $(call get-addon-cxxtext-for,$(1)) + @$(CXX) $(call get-addon-cxxflags-for,$(1)) -c $$< -o $$@ +endif +endif endef # first argument: a kernel set (name) being targeted (e.g. haswell). diff --git a/build/config.mk.in b/build/config.mk.in index efb123366b..4624220cf0 100644 --- a/build/config.mk.in +++ b/build/config.mk.in @@ -123,6 +123,7 @@ LDFLAGS_PRESET := @ldflags_preset@ # The level of debugging info to generate. DEBUG_TYPE := @debug_type@ +ENABLE_DEBUG := @enable_debug@ # Whether to compile and link the AddressSanitizer library. MK_ENABLE_ASAN := @enable_asan@ diff --git a/common.mk b/common.mk index eab31a9e04..0da4817e2b 100644 --- a/common.mk +++ b/common.mk @@ -427,6 +427,7 @@ KERNELS_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(KERNELS_DIR) ADDON_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(ADDON_DIR) SANDBOX_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(SANDBOX_DIR) +ENABLE_HPX := no # @@ -824,6 +825,7 @@ endif # gets added to begin with. CTHREADFLAGS := +CXXTHREADFLAGS := ifeq ($(CC_VENDOR),gcc) #ifneq ($(findstring auto,$(THREADING_MODEL)),) @@ -838,12 +840,13 @@ CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif ifneq ($(findstring hpx,$(THREADING_MODEL)),) -ifeq ($(debug_flag),1) -CXXTHREADFLAGS += `pkg-config --cflags hpx_application_debug` -LDFLAGS += `pkg-config --libs hpx_application_debug` -else +ENABLE_HPX := yes +ifneq ($(findstring yes,$(ENABLE_DEBUG)),) CXXTHREADFLAGS += `pkg-config --cflags hpx_application` LDFLAGS += `pkg-config --libs hpx_application` +else +CXXTHREADFLAGS += `pkg-config --cflags hpx_application_debug` +LDFLAGS += `pkg-config --libs hpx_application_debug` endif endif endif @@ -861,12 +864,13 @@ CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif ifneq ($(findstring hpx,$(THREADING_MODEL)),) -ifeq ($(debug_flag),1) -CXXTHREADFLAGS += `pkg-config --cflags hpx_application_debug` -LDFLAGS += `pkg-config --libs hpx_application_debug` -else +ENABLE_HPX := yes +ifneq ($(findstring yes,$(ENABLE_DEBUG)),) CXXTHREADFLAGS += `pkg-config --cflags hpx_application` LDFLAGS += `pkg-config --libs hpx_application` +else +CXXTHREADFLAGS += `pkg-config --cflags hpx_application_debug` +LDFLAGS += `pkg-config --libs hpx_application_debug` endif endif endif @@ -884,12 +888,13 @@ CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif ifneq ($(findstring hpx,$(THREADING_MODEL)),) -ifeq ($(debug_flag),1) -CXXTHREADFLAGS += `pkg-config --cflags hpx_application_debug` -LDFLAGS += `pkg-config --libs hpx_application_debug` -else +ENABLE_HPX := yes +ifneq ($(findstring yes,$(ENABLE_DEBUG)),) CXXTHREADFLAGS += `pkg-config --cflags hpx_application` LDFLAGS += `pkg-config --libs hpx_application` +else +CXXTHREADFLAGS += `pkg-config --cflags hpx_application_debug` +LDFLAGS += `pkg-config --libs hpx_application_debug` endif endif endif diff --git a/configure b/configure index 7406bdc4c4..ee548c3eac 100755 --- a/configure +++ b/configure @@ -2478,6 +2478,7 @@ main() # The user-given debug type and a flag indicating it was given. debug_type='' debug_flag='' + enable_debug='no' # A flag indicating whether AddressSanitizer should be used. enable_asan='no' @@ -3422,8 +3423,10 @@ main() debug_type='noopt' echo "${script_name}: enabling debug symbols; optimizations disabled." fi + enable_debug='yes' else debug_type='off' + enable_debug='no' echo "${script_name}: debug symbols disabled." fi diff --git a/frame/thread/bli_thrcomm.h b/frame/thread/bli_thrcomm.h index 94f37aee4b..86b81e8c48 100644 --- a/frame/thread/bli_thrcomm.h +++ b/frame/thread/bli_thrcomm.h @@ -112,7 +112,7 @@ typedef struct thrcomm_s #include "bli_thrcomm_single.h" #include "bli_thrcomm_openmp.h" #include "bli_thrcomm_pthreads.h" -#include "bli_thrcomm_hpx.hpp" +#include "bli_thrcomm_hpx.h" // Define a function pointer type for each of the functions that are // "overloaded" by each method of multithreading. diff --git a/frame/thread/bli_thrcomm_hpx.cpp b/frame/thread/bli_thrcomm_hpx_impl.cpp similarity index 95% rename from frame/thread/bli_thrcomm_hpx.cpp rename to frame/thread/bli_thrcomm_hpx_impl.cpp index 92e4281a84..95591c855a 100644 --- a/frame/thread/bli_thrcomm_hpx.cpp +++ b/frame/thread/bli_thrcomm_hpx_impl.cpp @@ -39,6 +39,10 @@ #ifdef BLIS_USE_HPX_BARRIER +#ifdef __cplusplus +extern "C" { +#endif + // Define the pthread_barrier_t implementations of the init, cleanup, and // barrier functions. @@ -59,8 +63,16 @@ void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm ) comm->barrier->arrive_and_wait(); } +#ifdef __cplusplus +} +#endif + #else +#ifdef __cplusplus +extern "C" { +#endif + // Define the non-pthread_barrier_t implementations of the init, cleanup, // and barrier functions. These are the default unless the pthread_barrier_t // versions are requested at compile-time. @@ -83,6 +95,10 @@ void bli_thrcomm_barrier_hpx( dim_t t_id, thrcomm_t* comm ) bli_thrcomm_barrier_atomic( t_id, comm ); } +#ifdef __cplusplus +} +#endif + #endif #endif diff --git a/frame/thread/bli_thrcomm_hpx.hpp b/frame/thread/bli_thrcomm_hpx_impl.hpp similarity index 97% rename from frame/thread/bli_thrcomm_hpx.hpp rename to frame/thread/bli_thrcomm_hpx_impl.hpp index 9071249609..36e3ab49ad 100644 --- a/frame/thread/bli_thrcomm_hpx.hpp +++ b/frame/thread/bli_thrcomm_hpx_impl.hpp @@ -32,8 +32,8 @@ */ -#ifndef BLIS_THRCOMM_HPX_H -#define BLIS_THRCOMM_HPX_H +#ifndef BLIS_THRCOMM_HPX_IMPL_H +#define BLIS_THRCOMM_HPX_IMPL_H // Define these prototypes for situations when POSIX multithreading is enabled. #ifdef BLIS_ENABLE_HPX diff --git a/frame/thread/bli_thread.c b/frame/thread/bli_thread.c index 0b0daad98c..4a109138f9 100644 --- a/frame/thread/bli_thread.c +++ b/frame/thread/bli_thread.c @@ -36,7 +36,7 @@ #include "blis.h" #ifdef BLIS_ENABLE_HPX -#include "bli_thread_hpx.hpp" +#include "bli_thread_hpx.h" #endif thrcomm_t BLIS_SINGLE_COMM = {}; @@ -75,9 +75,9 @@ static thread_launch_t thread_launch_fpa[ BLIS_NUM_THREAD_IMPLS ] = NULL, #endif [BLIS_HPX] = -#if defined(BLIS_ENABLE_OPENMP) +#if defined(BLIS_ENABLE_HPX) bli_thread_launch_hpx, -#elif defined(BLIS_ENABLE_PTHREADS) +#elif defined(BLIS_ENABLE_OPENMP) NULL, #else NULL, diff --git a/frame/thread/bli_thread_hpx_impl.cpp b/frame/thread/bli_thread_hpx_impl.cpp index f7a0194da6..f1f4a36888 100644 --- a/frame/thread/bli_thread_hpx_impl.cpp +++ b/frame/thread/bli_thread_hpx_impl.cpp @@ -51,7 +51,7 @@ typedef struct thread_data // Entry point for additional threads static void* bli_hpx_thread_entry( void* data_void ) { - const thread_data_t* data = data_void; + const thread_data_t* data = static_cast(data_void); const dim_t tid = data->tid; thrcomm_t* gl_comm = data->gl_comm; @@ -85,16 +85,15 @@ void bli_thread_launch_hpx( dim_t n_threads, thread_func_t func, const void* par #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_l3_thread_decorator().pth: " ); #endif - bli_pthread_t* pthreads = bli_malloc_intl( sizeof( bli_pthread_t ) * n_threads, &r_val ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_l3_thread_decorator().pth: " ); #endif - thread_data_t* datas = bli_malloc_intl( sizeof( thread_data_t ) * n_threads, &r_val ); + thread_data_t* datas = static_cast(bli_malloc_intl( sizeof( thread_data_t ) * n_threads, &r_val )); // NOTE: We must iterate backwards so that the chief thread (thread id 0) // can spawn all other threads before proceeding with its own computation. - auto irange = hpx::util::detail::make_counting_shape(num_seg); + auto irange = hpx::util::detail::make_counting_shape(n_threads); hpx::for_each(hpx::execution::par, hpx::util::begin(irange), hpx::util::end(irange), [&datas, &gl_comm, &func, ¶ms](const dim_t tid) { @@ -115,7 +114,6 @@ void bli_thread_launch_hpx( dim_t n_threads, thread_func_t func, const void* par #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_l3_thread_decorator().pth: " ); #endif - bli_free_intl( pthreads ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_l3_thread_decorator().pth: " ); From 55d41cd31d0668cfca1397022e233b5743166eb6 Mon Sep 17 00:00:00 2001 From: ctaylor Date: Tue, 1 Nov 2022 14:54:21 -0500 Subject: [PATCH 04/21] initial import --- frame/thread/bli_thrcomm_hpx.h | 46 ++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 frame/thread/bli_thrcomm_hpx.h diff --git a/frame/thread/bli_thrcomm_hpx.h b/frame/thread/bli_thrcomm_hpx.h new file mode 100644 index 0000000000..a39e1cbce6 --- /dev/null +++ b/frame/thread/bli_thrcomm_hpx.h @@ -0,0 +1,46 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_THRCOMM_HPX_H +#define BLIS_THRCOMM_HPX_H + +// Define these prototypes for situations when POSIX multithreading is enabled. +#ifdef BLIS_ENABLE_HPX + +#include "bli_thrcomm_hpx_impl.hpp" + +#endif + +#endif + From 6e37e9fc15faccb20107d27e2e72bd5ae6b9297d Mon Sep 17 00:00:00 2001 From: ctaylor Date: Tue, 1 Nov 2022 15:10:32 -0500 Subject: [PATCH 05/21] initial import --- common.mk | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/common.mk b/common.mk index 0da4817e2b..2b7d18b03f 100644 --- a/common.mk +++ b/common.mk @@ -842,11 +842,11 @@ endif ifneq ($(findstring hpx,$(THREADING_MODEL)),) ENABLE_HPX := yes ifneq ($(findstring yes,$(ENABLE_DEBUG)),) -CXXTHREADFLAGS += `pkg-config --cflags hpx_application` -LDFLAGS += `pkg-config --libs hpx_application` +CXXTHREADFLAGS += `pkg-config --cflags hpx_component` +LDFLAGS += `pkg-config --libs hpx_component` else -CXXTHREADFLAGS += `pkg-config --cflags hpx_application_debug` -LDFLAGS += `pkg-config --libs hpx_application_debug` +CXXTHREADFLAGS += `pkg-config --cflags hpx_component_debug` +LDFLAGS += `pkg-config --libs hpx_component_debug` endif endif endif @@ -866,11 +866,11 @@ endif ifneq ($(findstring hpx,$(THREADING_MODEL)),) ENABLE_HPX := yes ifneq ($(findstring yes,$(ENABLE_DEBUG)),) -CXXTHREADFLAGS += `pkg-config --cflags hpx_application` -LDFLAGS += `pkg-config --libs hpx_application` +CXXTHREADFLAGS += `pkg-config --cflags hpx_component` +LDFLAGS += `pkg-config --libs hpx_component` else -CXXTHREADFLAGS += `pkg-config --cflags hpx_application_debug` -LDFLAGS += `pkg-config --libs hpx_application_debug` +CXXTHREADFLAGS += `pkg-config --cflags hpx_component_debug` +LDFLAGS += `pkg-config --libs hpx_component_debug` endif endif endif @@ -890,11 +890,11 @@ endif ifneq ($(findstring hpx,$(THREADING_MODEL)),) ENABLE_HPX := yes ifneq ($(findstring yes,$(ENABLE_DEBUG)),) -CXXTHREADFLAGS += `pkg-config --cflags hpx_application` -LDFLAGS += `pkg-config --libs hpx_application` +CXXTHREADFLAGS += `pkg-config --cflags hpx_component` +LDFLAGS += `pkg-config --libs hpx_component` else -CXXTHREADFLAGS += `pkg-config --cflags hpx_application_debug` -LDFLAGS += `pkg-config --libs hpx_application_debug` +CXXTHREADFLAGS += `pkg-config --cflags hpx_component_debug` +LDFLAGS += `pkg-config --libs hpx_component_debug` endif endif endif From e871ea714ec203ba9fed64131282456f7a6507f9 Mon Sep 17 00:00:00 2001 From: ctaylor Date: Tue, 1 Nov 2022 15:51:24 -0500 Subject: [PATCH 06/21] initial import --- examples/hpx/00obj_basic.cpp | 252 +++++++++++++++++++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 examples/hpx/00obj_basic.cpp diff --git a/examples/hpx/00obj_basic.cpp b/examples/hpx/00obj_basic.cpp new file mode 100644 index 0000000000..4c06ee5853 --- /dev/null +++ b/examples/hpx/00obj_basic.cpp @@ -0,0 +1,252 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +#include +#include + +int hpx_main( int argc, char** argv ) +{ + obj_t a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11; + obj_t v1, v2; + num_t dt; + dim_t m, n; + inc_t rs, cs; + + + // + // This file demonstrates the basics of creating objects in BLIS, + // inspecting their basic properties, and printing matrix objects. + // + + + // + // Example 1: Create an object containing a 4x3 matrix of double- + // precision real elements stored in column-major order. + // + + // The matrix dimensions are m = 4 and n = 3. We choose to use column + // storage (often called column-major storage) and thus we specify + // that the row stride ("rs" for short) argument is 1 and the column + // stride ("cs" for short) argument is equal to m = 4. In column + // storage, cs is known as the leading dimension. + dt = BLIS_DOUBLE; m = 4; n = 3; + rs = 1; cs = 4; + bli_obj_create( dt, m, n, rs, cs, &a1 ); + + // If cs is greater than m, then extra rows (in this case, two) will + // be allocated beyond the lower edge of the matrix. Sometimes this + // is desireable for alignment purposes. + dt = BLIS_DOUBLE; m = 4; n = 3; + rs = 1; cs = 6; + bli_obj_create( dt, m, n, rs, cs, &a2 ); + + + // + // Example 2: Create an object containing a 4x3 matrix of double- + // precision real elements stored in row-major order. + // + + // Here, we choose to use row storage (often called row-major storage) + // and thus we specify that the cs is 1 and rs is equal to n = 3. In + // row storage, the leading dimension corresponds to rs. + dt = BLIS_DOUBLE; m = 4; n = 3; + rs = 3; cs = 1; + bli_obj_create( dt, m, n, rs, cs, &a3 ); + + // As with the second example, we can cause extra columns (in this + // case, five) to be allocated beyond the right edge of the matrix. + dt = BLIS_DOUBLE; m = 4; n = 3; + rs = 8; cs = 1; + bli_obj_create( dt, m, n, rs, cs, &a4 ); + + + // + // Example 3: Create objects using other floating-point datatypes. + // + + // Examples of using the other floating-point datatypes. + m = 4; n = 3; + rs = 1; cs = 4; + bli_obj_create( BLIS_FLOAT, m, n, rs, cs, &a5 ); + bli_obj_create( BLIS_SCOMPLEX, m, n, rs, cs, &a6 ); + bli_obj_create( BLIS_DCOMPLEX, m, n, rs, cs, &a7 ); + + + // + // Example 4: Create objects using default (column) storage so that + // we avoid having to specify rs and cs manually. + // + + // Specifying the row and column strides as zero, as is done here, is + // a shorthand request for the default storage scheme, which is + // currently (and always has been) column storage. When requesting the + // default storage scheme with rs = cs = 0, BLIS may insert additional + // padding for alignment purposes. So, the 3x8 matrix object created + // below may end up having a row stride that is greater than 3. When + // in doubt, query the value! + bli_obj_create( BLIS_FLOAT, 3, 5, 0, 0, &a8 ); + + + // + // Example 5: Inspect object fields after creation to expose + // possible alignment/padding. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + // Let's inspect the amount of padding inserted for alignment. Note + // the difference between the m dimension and the column stride. + printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a8 ) ) ); + printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a8 ) ) ); + printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a8 ) ); + printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a8 ) ); + printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a8 ) ); + printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a8 ) ); + + + // + // Example 6: Inspect object fields after creation of other floating- + // point datatypes. + // + + printf( "\n#\n# -- Example 6 --\n#\n\n" ); + + bli_obj_create( BLIS_DOUBLE, 3, 5, 0, 0, &a9 ); + bli_obj_create( BLIS_SCOMPLEX, 3, 5, 0, 0, &a10); + bli_obj_create( BLIS_DCOMPLEX, 3, 5, 0, 0, &a11 ); + + printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a9 ) ) ); + printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a9 ) ) ); + printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a9 ) ); + printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a9 ) ); + printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a9 ) ); + printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a9 ) ); + + printf( "\n" ); + printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a10 ) ) ); + printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a10 ) ) ); + printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a10 ) ); + printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a10 ) ); + printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a10 ) ); + printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a10 ) ); + + printf( "\n" ); + printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a11 ) ) ); + printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a11 ) ) ); + printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a11 ) ); + printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a11 ) ); + printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a11 ) ); + printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a11 ) ); + + + // + // Example 7: Initialize an object's elements to random values and then + // print the matrix. + // + + printf( "\n#\n# -- Example 7 --\n#\n\n" ); + + // We can set matrices to random values. The default behavior of + // bli_randm() is to use random values on the internval [-1,1]. + bli_randm( &a9 ); + + // And we can also print the matrices associated with matrix objects. + // Notice that the third argument is a printf()-style format specifier. + // Any valid printf() format specifier can be passed in here, but you + // still need to make sure that the specifier makes sense for the data + // being printed. For example, you shouldn't use "%d" when printing + // elements of type 'float'. + bli_printm( "matrix 'a9' contents:", &a9, "%4.1f", "" ); + + + // + // Example 8: Randomize and then print from an object containing a complex + // matrix. + // + + printf( "\n#\n# -- Example 8 --\n#\n\n" ); + + // When printing complex matrices, the same format specifier gets used + // for both the real and imaginary parts. + bli_randm( &a11 ); + bli_printm( "matrix 'a11' contents (complex):", &a11, "%4.1f", "" ); + + + // + // Example 9: Create, randomize, and print vector objects. + // + + printf( "\n#\n# -- Example 9 --\n#\n\n" ); + + // Now let's create two vector objects--a row vector and a column vector. + // (A vector object is like a matrix object, except that it has at least + // one unit dimension (equal to one). + bli_obj_create( BLIS_DOUBLE, 4, 1, 0, 0, &v1 ); + bli_obj_create( BLIS_DOUBLE, 1, 6, 0, 0, &v2 ); + + // If we know the object is a vector, we can use bli_randv(), though + // bli_randm() would work just as well, since any vector is also a matrix. + bli_randv( &v1 ); + bli_randv( &v2 ); + + // We can print vectors, too. + bli_printm( "vector 'v1' contents:", &v1, "%5.1f", "" ); + bli_printm( "vector 'v2' contents:", &v2, "%5.1f", "" ); + + + // Free all of the objects we created. + bli_obj_free( &a1 ); + bli_obj_free( &a2 ); + bli_obj_free( &a3 ); + bli_obj_free( &a4 ); + bli_obj_free( &a5 ); + bli_obj_free( &a6 ); + bli_obj_free( &a7 ); + bli_obj_free( &a8 ); + bli_obj_free( &a9 ); + bli_obj_free( &a10 ); + bli_obj_free( &a11 ); + bli_obj_free( &v1 ); + bli_obj_free( &v2 ); + + return hpx::finalize(); +} + +int main(int argc, char ** argv){ + return hpx::init(argc, argv); +} + From e275161297db4092348b07a74b51adec3061e61f Mon Sep 17 00:00:00 2001 From: ctaylor Date: Tue, 1 Nov 2022 16:28:27 -0500 Subject: [PATCH 07/21] initial import --- examples/hpx/{ => oapi}/00obj_basic.cpp | 0 examples/hpx/oapi/01obj_attach.cpp | 182 +++++++++++++ examples/hpx/oapi/02obj_ij.cpp | 276 +++++++++++++++++++ examples/hpx/oapi/03obj_view.cpp | 285 ++++++++++++++++++++ examples/hpx/oapi/04level0.cpp | 188 +++++++++++++ examples/hpx/oapi/05level1v.cpp | 189 +++++++++++++ examples/hpx/oapi/06level1m.cpp | 235 ++++++++++++++++ examples/hpx/oapi/07level1m_diag.cpp | 337 +++++++++++++++++++++++ examples/hpx/oapi/08level2.cpp | 333 +++++++++++++++++++++++ examples/hpx/oapi/09level3.cpp | 338 ++++++++++++++++++++++++ examples/hpx/oapi/Makefile | 16 ++ 11 files changed, 2379 insertions(+) rename examples/hpx/{ => oapi}/00obj_basic.cpp (100%) create mode 100644 examples/hpx/oapi/01obj_attach.cpp create mode 100644 examples/hpx/oapi/02obj_ij.cpp create mode 100644 examples/hpx/oapi/03obj_view.cpp create mode 100644 examples/hpx/oapi/04level0.cpp create mode 100644 examples/hpx/oapi/05level1v.cpp create mode 100644 examples/hpx/oapi/06level1m.cpp create mode 100644 examples/hpx/oapi/07level1m_diag.cpp create mode 100644 examples/hpx/oapi/08level2.cpp create mode 100644 examples/hpx/oapi/09level3.cpp create mode 100644 examples/hpx/oapi/Makefile diff --git a/examples/hpx/00obj_basic.cpp b/examples/hpx/oapi/00obj_basic.cpp similarity index 100% rename from examples/hpx/00obj_basic.cpp rename to examples/hpx/oapi/00obj_basic.cpp diff --git a/examples/hpx/oapi/01obj_attach.cpp b/examples/hpx/oapi/01obj_attach.cpp new file mode 100644 index 0000000000..22a9c0113c --- /dev/null +++ b/examples/hpx/oapi/01obj_attach.cpp @@ -0,0 +1,182 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include +#include "blis.h" + +#include +#include + +void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); +void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); + +int hpx_main( int argc, char** argv ) +{ + obj_t a1, a2; + num_t dt; + dim_t m, n; + inc_t rs, cs; + + + // + // This file demonstrates interfacing external or existing buffers + // with BLIS objects. + // + + + // + // Example 1: Create a bufferless object and then attach an external + // buffer to it, specifying column storage. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // We'll use these parameters for the following examples. + dt = BLIS_DOUBLE; + m = 4; n = 5; rs = 1; cs = m; + + // First we allocate and initialize a matrix by columns. + double* p1 = static_cast(malloc( m * n * sizeof( double ) )); + init_dmatrix_by_cols( m, n, p1, rs, cs ); + + // bli_obj_create() automatically allocates an array large enough to hold + // of the elements. We can also create a "bufferless" object and then + // "attach" our own buffer to that object. This is useful when interfacing + // BLIS objects to an existing application that produces its own matrix + // arrays/buffers. + bli_obj_create_without_buffer( dt, m, n, &a1 ); + + // Note that the fourth argument of bli_obj_attach_buffer() is the so-called + // "imaginary stride". First of all, this stride only has meaning in the + // complex domain. Secondly, it is a somewhat experimental property of the + // obj_t, and one that is not fully recognized/utilized throughout BLIS. + // Thus, the safe thing to do is to always pass in a 0, which is a request + // for the default (which is actually 1). Please don't use any other value + // unless you really know what you are doing. + bli_obj_attach_buffer( p1, rs, cs, 0, &a1 ); + + // Now let's print the matrix so we can see how the element values were + // assigned. + bli_printm( "matrix 'a1', initialized by columns:", &a1, "%5.1f", "" ); + + + // + // Example 2: Create a bufferless object and then attach an external + // buffer to it, specifying row storage. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Now let's allocate another buffer, but this time we'll initialize it by + // rows instead of by columns. We'll use the same values for m, n, rs, cs. + double* p2 = static_cast(malloc( m * n * sizeof( double ) )); + init_dmatrix_by_rows( m, n, p2, rs, cs ); + + // Create a new bufferless object and attach the new buffer. This time, + // instead of calling bli_obj_create_without_buffer() followed by + // bli_obj_attach_buffer(), we call bli_obj_create_with_attached_buffer(), + // which is just a convenience wrapper around the former two functions. + // (Note that the wrapper function omits the imaginary stride argument.) +#if 1 + bli_obj_create_with_attached_buffer( dt, m, n, p2, rs, cs, &a2 ); +#else + bli_obj_create_without_buffer( dt, m, n, &a2 ); + bli_obj_attach_buffer( p2, rs, cs, 0, &a2 ); +#endif + + // Print the matrix so we can compare it to the first matrix output. + bli_printm( "matrix 'a2', initialized by rows:", &a2, "%5.1f", "" ); + + // Please note that after creating an object via either of: + // - bli_obj_create_without_buffer(), or + // - bli_obj_create_with_attached_buffer() + // we do NOT free it! That's because these functions merely initialize the + // object and do not actually allocate any memory. + + + // Free the memory arrays we allocated. + free( p1 ); + free( p2 ); + + return hpx::finalize(); +} + +// ----------------------------------------------------------------------------- + +void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) +{ + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by rows, assigning each element a unique + // value, starting at 0. + for ( i = 0; i < m; ++i ) + { + for ( j = 0; j < n; ++j ) + { + double* a_ij = a + i*rs + j*cs; + + *a_ij = alpha; + + alpha += 1.0; + } + } +} + +void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) +{ + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each element a unique + // value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + double* a_ij = a + i*rs + j*cs; + + *a_ij = alpha; + + alpha += 1.0; + } + } +} + +int main(int argc, char** argv) { + return hpx::init(argc, argv); +} diff --git a/examples/hpx/oapi/02obj_ij.cpp b/examples/hpx/oapi/02obj_ij.cpp new file mode 100644 index 0000000000..96c9e411c0 --- /dev/null +++ b/examples/hpx/oapi/02obj_ij.cpp @@ -0,0 +1,276 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include +#include "blis.h" + +#include +#include + +void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); +void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); +void init_dobj_by_cols( obj_t* a ); +void init_zobj_by_cols( obj_t* a ); + +int hpx_main( int argc, char** argv ) +{ + obj_t a1, a2, a3; + num_t dt; + dim_t m, n; + inc_t rs, cs; + dim_t i, j; + + + // + // This file demonstrates accessing and updating individual matrix elements + // through the BLIS object API. + // + + + // + // Example 1: Create an object and then individually access/view some of + // its elements. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // We'll use these parameters for the following examples. + dt = BLIS_DOUBLE; + m = 4; n = 5; rs = 1; cs = m; + + // Create a object with known elements using the same approach as the + // previous example file. + double* p1 = static_cast(malloc( m * n * sizeof( double ) )); + init_dmatrix_by_cols( m, n, p1, rs, cs ); + bli_obj_create_with_attached_buffer( dt, m, n, p1, rs, cs, &a1 ); + + bli_printm( "matrix 'a1' (initial state)", &a1, "%5.1f", "" ); + + // Regardless of how we create our object--whether via bli_obj_create() or + // via attaching an existing buffer to a bufferless object--we can access + // individual elements by specifying their offsets. The output value is + // broken up by real and imaginary component. (When accessing real matrices, + // the imaginary component will always be zero.) + i = 1; j = 3; + double alpha_r, alpha_i; + bli_getijm( i, j, &a1, &alpha_r, &alpha_i ); + + // Here, we print out the element "returned" by bli_getijm(). + printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", ( int )i, ( int )j, alpha_r, alpha_i ); + + // Let's query a few more elements. + i = 0; j = 2; + bli_getijm( i, j, &a1, &alpha_r, &alpha_i ); + + printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", ( int )i, ( int )j, alpha_r, alpha_i ); + + i = 3; j = 4; + bli_getijm( i, j, &a1, &alpha_r, &alpha_i ); + + printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", ( int )i, ( int )j, alpha_r, alpha_i ); + + printf( "\n" ); + + + // + // Example 2: Modify individual elements of an existing matrix. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Now let's change a few elements. Even if we set the imaginary + // argument to a non-zero value, argument is ignored since we're + // modifying a real matrix. If a1 were a complex object, those + // values would be stored verbatim into the appropriate matrix + // elements (see example for a3 below). + alpha_r = -3.0; alpha_i = 0.0; i = 1; j = 3; + bli_setijm( alpha_r, alpha_i, i, j, &a1 ); + + alpha_r = -9.0; alpha_i = -1.0; i = 0; j = 2; + bli_setijm( alpha_r, alpha_i, i, j, &a1 ); + + alpha_r = -7.0; alpha_i = 2.0; i = 3; j = 4; + bli_setijm( alpha_r, alpha_i, i, j, &a1 ); + + // Print the matrix again so we can see the update elements. + bli_printm( "matrix 'a1' (modified state)", &a1, "%5.1f", "" ); + + // Next, let's create a regular object (with a buffer) and then + // initialize its elements using bli_setijm(). + bli_obj_create( dt, m, n, rs, cs, &a2 ); + + // See definition of init_dobj_by_cols() below. + init_dobj_by_cols( &a2 ); + + // Because we initialized a2 in the same manner as a1 (by columns), + // it should contain the same initial state as a1. + bli_printm( "matrix 'a2'", &a2, "%5.1f", "" ); + + + // + // Example 3: Modify individual elements of an existing complex matrix. + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // Create and initialize a complex object. + dt = BLIS_DCOMPLEX; + bli_obj_create( dt, m, n, rs, cs, &a3 ); + + // Initialize the matrix elements. (See definition of init_dobj_by_cols() + // below). + init_zobj_by_cols( &a3 ); + + // Print the complex matrix. + bli_printm( "matrix 'a3' (initial state)", &a3, "%5.1f", "" ); + + i = 3; j = 0; + bli_getijm( i, j, &a3, &alpha_r, &alpha_i ); + alpha_r *= -1.0; alpha_i *= -1.0; + bli_setijm( alpha_r, alpha_i, i, j, &a3 ); + + i = 3; j = 4; + bli_getijm( i, j, &a3, &alpha_r, &alpha_i ); + alpha_r *= -1.0; alpha_i *= -1.0; + bli_setijm( alpha_r, alpha_i, i, j, &a3 ); + + i = 0; j = 4; + bli_getijm( i, j, &a3, &alpha_r, &alpha_i ); + alpha_r *= -1.0; alpha_i *= -1.0; + bli_setijm( alpha_r, alpha_i, i, j, &a3 ); + + // Print the matrix again so we can see the update elements. + bli_printm( "matrix 'a3' (modified state)", &a3, "%5.1f", "" ); + + // Free the memory arrays we allocated. + free( p1 ); + + + // Free the objects we created. + bli_obj_free( &a2 ); + bli_obj_free( &a3 ); + + return hpx::finalize(); +} + +// ----------------------------------------------------------------------------- + +void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) +{ + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by rows, assigning each element a unique + // value, starting at 0. + for ( i = 0; i < m; ++i ) + { + for ( j = 0; j < n; ++j ) + { + double* a_ij = a + i*rs + j*cs; + + *a_ij = alpha; + + alpha += 1.0; + } + } +} + +void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) +{ + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each element a unique + // value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + double* a_ij = a + i*rs + j*cs; + + *a_ij = alpha; + + alpha += 1.0; + } + } +} + +void init_dobj_by_cols( obj_t* a ) +{ + dim_t m = bli_obj_length( a ); + dim_t n = bli_obj_width( a ); + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each element a unique + // value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + bli_setijm( alpha, 0.0, i, j, a ); + + alpha += 1.0; + } + } +} + +void init_zobj_by_cols( obj_t* a ) +{ + dim_t m = bli_obj_length( a ); + dim_t n = bli_obj_width( a ); + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each real and imaginary + // element a unique value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + bli_setijm( alpha, alpha + 1.0, i, j, a ); + + alpha += 2.0; + } + } +} + +int main(int argc, char ** argv) { + return hpx::init(argc, argv); +} diff --git a/examples/hpx/oapi/03obj_view.cpp b/examples/hpx/oapi/03obj_view.cpp new file mode 100644 index 0000000000..a751cb82bf --- /dev/null +++ b/examples/hpx/oapi/03obj_view.cpp @@ -0,0 +1,285 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include +#include "blis.h" + +#include +#include + +void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); +void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); +void init_dobj_by_cols( obj_t* a ); +void init_zobj_by_cols( obj_t* a ); + +int hpx_main( int argc, char** argv ) +{ + obj_t a1, a2; + obj_t v1, v2, v3, v4, v5; + num_t dt; + dim_t m, n; + inc_t rs, cs; + dim_t i, j; + dim_t mv, nv; + + + // + // This file demonstrates creating and submatrix views into existing matrices. + // + + + // + // Example 1: Create an object and then create a submatrix view. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // We'll use these parameters for the following examples. + dt = BLIS_DOUBLE; + m = 6; n = 7; rs = 1; cs = m; + + // Create an object a1 using bli_obj_create(). + bli_obj_create( dt, m, n, rs, cs, &a1 ); + + // Initialize a1 to contain known values. + init_dobj_by_cols( &a1 ); + + bli_printm( "matrix 'a1' (initial state)", &a1, "%5.1f", "" ); + + // Acquire a 4x3 submatrix view into a1 at (i,j) offsets (1,2). + i = 1; j = 2; mv = 4; nv = 3; + bli_acquire_mpart( i, j, mv, nv, &a1, &v1 ); + + bli_printm( "4x3 submatrix 'v1' at offsets (1,2)", &v1, "%5.1f", "" ); + + // NOTE: Submatrix views should never be passed to bli_obj_free(). It + // will not cause an immediate error, but it is bad practice. Instead, + // you should only release the objects that were created directy via + // bli_obj_create(). In the above example, that means only object a1 + // would be passed to bli_obj_free(). + + + // + // Example 2: Modify the contents of a submatrix view. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Modify the first three elements of the first column. + bli_setijm( -3.0, 0.0, 0, 0, &v1 ); + bli_setijm( -4.0, 0.0, 1, 0, &v1 ); + bli_setijm( -5.0, 0.0, 2, 0, &v1 ); + + // Modify the first three elements of the second column. + bli_setijm( -6.0, 0.0, 0, 1, &v1 ); + bli_setijm( -7.0, 0.0, 1, 1, &v1 ); + bli_setijm( -8.0, 0.0, 2, 1, &v1 ); + + // Print the matrix again so we can see the update elements. + bli_printm( "submatrix view 'v1' (modified state)", &v1, "%5.1f", "" ); + bli_printm( "matrix 'a1' (indirectly modified due to changes to 'v1')", &a1, "%5.1f", "" ); + + + // + // Example 3: Create a submatrix view that is "too big". + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // bli_acquire_mpart() will safely truncate your requested submatrix + // view dimensions (or even the offsets) if they extend beyond the + // bounds of the parent object. + + bli_printm( "matrix 'a1' (current state)", &a1, "%5.1f", "" ); + + // Acquire a 4x3 submatrix view into a1 at offsets (4,2). Notice how + // the requested view contains four rows, but the view is created with + // only two rows because the starting m offset of 4 leaves only two rows + // left in the parent matrix. + bli_acquire_mpart( 4, 2, 4, 3, &a1, &v2 ); + + bli_printm( "4x3 submatrix 'v2' at offsets (4,2) -- two rows truncated for safety", &v2, "%5.1f", "" ); + + + // + // Example 4: Create a bufferless object, attach an external buffer, and + // then create a submatrix view. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // Create a object with known elements using the same approach as the + // previous example file. + double* p1 = static_cast(malloc( m * n * sizeof( double ) )); + init_dmatrix_by_cols( m, n, p1, rs, cs ); + bli_obj_create_with_attached_buffer( dt, m, n, p1, rs, cs, &a2 ); + + bli_printm( "matrix 'a2' (initial state)", &a2, "%5.1f", "" ); + + // Acquire a 3x4 submatrix view at offset (2,3). + bli_acquire_mpart( 2, 3, 3, 4, &a2, &v3 ); + + bli_printm( "3x4 submatrix view 'v3' at offsets (2,3)", &v3, "%5.1f", "" ); + + + // + // Example 5: Use a submatrix view to set a region of a larger matrix to + // zero. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + bli_printm( "3x4 submatrix view 'v3' at offsets (2,3)", &v3, "%5.1f", "" ); + + bli_setm( &BLIS_ZERO, &v3 ); + + bli_printm( "3x4 submatrix view 'v3' (zeroed out)", &v3, "%5.1f", "" ); + + bli_printm( "matrix 'a2' (modified state)", &a2, "%5.1f", "" ); + + + // + // Example 6: Obtain a submatrix view into a submatrix view. + // + + printf( "\n#\n# -- Example 6 --\n#\n\n" ); + + bli_acquire_mpart( 1, 1, 5, 6, &a2, &v4 ); + + bli_printm( "5x6 submatrix view 'v4' at offsets (1,1) of 'a2'", &v4, "%5.1f", "" ); + + bli_acquire_mpart( 1, 0, 4, 5, &v4, &v5 ); + + bli_printm( "4x5 submatrix view 'v5' at offsets (1,0) of 'v4'", &v5, "%5.1f", "" ); + + + // Free the memory arrays we allocated. + free( p1 ); + + // Free the objects we created. + bli_obj_free( &a1 ); + + return hpx::finalize(); +} + +// ----------------------------------------------------------------------------- + +void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) +{ + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by rows, assigning each element a unique + // value, starting at 0. + for ( i = 0; i < m; ++i ) + { + for ( j = 0; j < n; ++j ) + { + double* a_ij = a + i*rs + j*cs; + + *a_ij = alpha; + + alpha += 1.0; + } + } +} + +void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) +{ + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each element a unique + // value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + double* a_ij = a + i*rs + j*cs; + + *a_ij = alpha; + + alpha += 1.0; + } + } +} + +void init_dobj_by_cols( obj_t* a ) +{ + dim_t m = bli_obj_length( a ); + dim_t n = bli_obj_width( a ); + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each element a unique + // value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + bli_setijm( alpha, 0.0, i, j, a ); + + alpha += 1.0; + } + } +} + +void init_zobj_by_cols( obj_t* a ) +{ + dim_t m = bli_obj_length( a ); + dim_t n = bli_obj_width( a ); + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each real and imaginary + // element a unique value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + bli_setijm( alpha, alpha + 1.0, i, j, a ); + + alpha += 2.0; + } + } +} + +int main( int argc, char** argv ) { + return hpx::init(argc, argv); +} diff --git a/examples/hpx/oapi/04level0.cpp b/examples/hpx/oapi/04level0.cpp new file mode 100644 index 0000000000..e334b1a44f --- /dev/null +++ b/examples/hpx/oapi/04level0.cpp @@ -0,0 +1,188 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +#include +#include + +int hpx_main( int argc, char** argv ) +{ + obj_t alpha, beta, gamma, kappa, zeta; + num_t dt; + double gamma_d; + + + // + // This file demonstrates working with scalar objects. + // + + + // + // Example 1: Create a scalar (1x1) object. + // + + dt = BLIS_DOUBLE; + + // The easiest way to create a scalar object is with the following + // convenience function. + bli_obj_create_1x1( dt, &alpha ); + + // We could, of course, create an object using our more general-purpose + // function, using m = n = 1. + bli_obj_create( dt, 1, 1, 0, 0, &beta ); + + // We can even attach an external scalar. This function, unlike + // bli_obj_create_1x1() and bli_obj_create(), does not result in any + // memory allocation. + bli_obj_create_1x1_with_attached_buffer( dt, &gamma_d, &gamma ); + + // There is one more way to create an object. Like the previous method, + // it also avoids memory allocation by referencing a special "internal" + // scalar that is invisibly part of every object. + bli_obj_scalar_init_detached( dt, &kappa ); + + // Digression: In the most common cases, there is no need to create scalar + // objects to begin with. That's because BLIS comes with three ready-to-use + // globally-scoped scalar objects: + // + // obj_t BLIS_MINUS_ONE; + // obj_t BLIS_ZERO; + // obj_t BLIS ONE; + // + // Each of these special objects is provided by blis.h. They can be used + // wherever a scalar object is expected as an input operand regardless of + // the datatype of your other operands. Note that you should never try to + // modify these global scalar objects directly, nor should you ever try to + // perform an operation *on* the objects (that is, you should never try to + // update their values, though you can always perform operations *with* + // them--that's the whole point!). + + + // + // Example 2: Set the value of an existing scalar object. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Once you've created an object, you can set its value via setsc. As with + // setijm, setsc takes a real and imaginary value, but you can ignore the + // imaginary argument if your object is real. And even if you pass in a + // non-zero value, it is ignored for real objects. + bli_setsc( -4.0, 0.0, &alpha ); + bli_setsc( 3.0, 1.0, &beta ); + bli_setsc( 0.5, 0.0, &kappa ); + bli_setsc( 10.0, 0.0, &gamma ); + + // BLIS does not have a special print function for scalars, but since a + // 1x1 is also a vector and a matrix, we can use printv or printm. + bli_printm( "alpha:", &alpha, "%4.1f", "" ); + bli_printm( "beta:", &beta, "%4.1f", "" ); + bli_printm( "kappa:", &kappa, "%4.1f", "" ); + bli_printm( "gamma:", &gamma, "%4.1f", "" ); + + + // + // Example 3: Create and set the value of a complex scalar object. + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // Create one more scalar, this time a complex scalar, to show how it + // can be used. + bli_obj_create_1x1( BLIS_DCOMPLEX, &zeta ); + bli_setsc( 3.3, -4.4, &zeta ); + bli_printm( "zeta (complex):", &zeta, "%4.1f", "" ); + + + // + // Example 4: Copy scalar objects. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // We can copy scalars amongst one another, and we can use the global + // scalar constants for input operands. + bli_copysc( &beta, &gamma ); + bli_printm( "gamma (overwritten with beta):", &gamma, "%4.1f", "" ); + + bli_copysc( &BLIS_ONE, &gamma ); + bli_printm( "gamma (overwritten with BLIS_ONE):", &gamma, "%4.1f", "" ); + + + // + // Example 5: Perform other operations on scalar objects. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + // BLIS defines a range of basic floating-point operations on scalars. + bli_addsc( &beta, &gamma ); + bli_printm( "gamma := gamma + beta", &gamma, "%4.1f", "" ); + + bli_subsc( &alpha, &gamma ); + bli_printm( "gamma := gamma - alpha", &gamma, "%4.1f", "" ); + + bli_divsc( &kappa, &gamma ); + bli_printm( "gamma := gamma / kappa", &gamma, "%4.1f", "" ); + + bli_sqrtsc( &gamma, &gamma ); + bli_printm( "gamma := sqrt( gamma )", &gamma, "%4.1f", "" ); + + bli_normfsc( &alpha, &alpha ); + bli_printm( "alpha := normf( alpha ) # normf() = abs() in real domain.", &alpha, "%4.1f", "" ); + + // Note that normfsc() allows complex input objects, but requires that the + // output operand (the second operand) be a real object. + bli_normfsc( &zeta, &alpha ); + bli_printm( "alpha := normf( zeta ) # normf() = complex modulus in complex domain.", &alpha, "%4.1f", "" ); + + bli_invertsc( &gamma ); + bli_printm( "gamma := 1.0 / gamma", &gamma, "%4.2f", "" ); + + + // Only free the objects that resulted in actual allocation. + bli_obj_free( &alpha ); + bli_obj_free( &beta ); + bli_obj_free( &zeta ); + + return hpx::finalize(); +} + +// ----------------------------------------------------------------------------- + +int main( int argc, char** argv ) { + return hpx::init(argc, argv); +} diff --git a/examples/hpx/oapi/05level1v.cpp b/examples/hpx/oapi/05level1v.cpp new file mode 100644 index 0000000000..4fcb0ed45e --- /dev/null +++ b/examples/hpx/oapi/05level1v.cpp @@ -0,0 +1,189 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +#include +#include + +int hpx_main( int argc, char** argv ) +{ + obj_t alpha, beta, gamma; + obj_t x, y, z, w, a; + num_t dt; + dim_t m, n; + inc_t rs, cs; + + + // + // This file demonstrates working with vector objects and the level-1v + // operations. + // + + + // + // Example 1: Create vector objects and then broadcast (copy) scalar + // values to all elements. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // Create a few vectors to work with. We make them all of the same length + // so that we can perform operations between them. + // NOTE: We've chosen to use row vectors here (1x4) instead of column + // vectors (4x1) to allow for easier reading of standard output (less + // scrolling). + dt = BLIS_DOUBLE; + m = 1; n = 4; rs = 0; cs = 0; + bli_obj_create( dt, m, n, rs, cs, &x ); + bli_obj_create( dt, m, n, rs, cs, &y ); + bli_obj_create( dt, m, n, rs, cs, &z ); + bli_obj_create( dt, m, n, rs, cs, &w ); + bli_obj_create( dt, m, n, rs, cs, &a ); + + // Let's also create and initialize some scalar objects. + bli_obj_create_1x1( dt, &alpha ); + bli_obj_create_1x1( dt, &beta ); + bli_obj_create_1x1( dt, &gamma ); + + bli_setsc( 2.0, 0.0, &alpha ); + bli_setsc( 0.2, 0.0, &beta ); + bli_setsc( 3.0, 0.0, &gamma ); + + bli_printm( "alpha:", &alpha, "%4.1f", "" ); + bli_printm( "beta:", &beta, "%4.1f", "" ); + bli_printm( "gamma:", &gamma, "%4.1f", "" ); + + // Vectors can set by "broadcasting" a constant to every element. + bli_setv( &BLIS_ONE, &x ); + bli_setv( &alpha, &y ); + bli_setv( &BLIS_ZERO, &z ); + + // Note that we can use printv or printm to print vectors since vectors + // are also matrices. We choose to use printm because it honors the + // orientation of the vector (row or column) when printing, whereas + // printv always prints vectors as column vectors regardless of their + // they are 1 x n or n x 1. + bli_printm( "x := 1.0", &x, "%4.1f", "" ); + bli_printm( "y := alpha", &y, "%4.1f", "" ); + bli_printm( "z := 0.0", &z, "%4.1f", "" ); + + + // + // Example 2: Randomize a vector object. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Set a vector to random values. + bli_randv( &w ); + + bli_printm( "w := randv()", &w, "%4.1f", "" ); + + + // + // Example 3: Perform various element-wise operations on vector objects. + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // Copy a vector. + bli_copyv( &w, &a ); + bli_printm( "a := w", &a, "%4.1f", "" ); + + // Add and subtract vectors. + bli_addv( &y, &a ); + bli_printm( "a := a + y", &a, "%4.1f", "" ); + + bli_subv( &w, &a ); + bli_printm( "a := a - w", &a, "%4.1f", "" ); + + // Scale a vector (destructive). + bli_scalv( &beta, &a ); + bli_printm( "a := beta * a", &a, "%4.1f", "" ); + + // Scale a vector (non-destructive). + bli_scal2v( &gamma, &a, &z ); + bli_printm( "z := gamma * a", &z, "%4.1f", "" ); + + // Scale and accumulate between vectors. + bli_axpyv( &alpha, &w, &x ); + bli_printm( "x := x + alpha * w", &x, "%4.1f", "" ); + + bli_xpbyv( &w, &BLIS_MINUS_ONE, &x ); + bli_printm( "x := -1.0 * x + w", &x, "%4.1f", "" ); + + // Invert a vector element-wise. + bli_invertv( &y ); + bli_printm( "y := 1 / y", &y, "%4.1f", "" ); + + // Swap two vectors. + bli_swapv( &x, &y ); + bli_printm( "x (after swapping with y)", &x, "%4.1f", "" ); + bli_printm( "y (after swapping with x)", &y, "%4.1f", "" ); + + + // + // Example 4: Perform contraction-like operations on vector objects. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // Perform a dot product. + bli_dotv( &a, &z, &gamma ); + bli_printm( "gamma := a * z (dot product)", &gamma, "%5.2f", "" ); + + // Perform an extended dot product. + bli_dotxv( &alpha, &a, &z, &BLIS_ONE, &gamma ); + bli_printm( "gamma := 1.0 * gamma + alpha * a * z (accumulate scaled dot product)", &gamma, "%5.2f", "" ); + + + // Free the objects. + bli_obj_free( &alpha ); + bli_obj_free( &beta ); + bli_obj_free( &gamma ); + bli_obj_free( &x ); + bli_obj_free( &y ); + bli_obj_free( &z ); + bli_obj_free( &w ); + bli_obj_free( &a ); + + return hpx::finalize(); +} + +// ----------------------------------------------------------------------------- +int main( int argc, char** argv ) { + return hpx::init(argc, argv); +} diff --git a/examples/hpx/oapi/06level1m.cpp b/examples/hpx/oapi/06level1m.cpp new file mode 100644 index 0000000000..039aa1ef2f --- /dev/null +++ b/examples/hpx/oapi/06level1m.cpp @@ -0,0 +1,235 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +#include +#include + +int hpx_main( int argc, char** argv ) +{ + obj_t alpha, beta, gamma; + obj_t a, b, c, d, e, f, g, h; + num_t dt; + dim_t m, n; + inc_t rs, cs; + + + // + // This file demonstrates working with matrix objects and the level-1m + // operations. + // + + + // + // Example 1: Create matrix objects and then broadcast (copy) scalar + // values to all elements. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // Create a few matrices to work with. We make them all of the same + // dimensions so that we can perform operations between them. + dt = BLIS_DOUBLE; + m = 2; n = 3; rs = 0; cs = 0; + bli_obj_create( dt, m, n, rs, cs, &a ); + bli_obj_create( dt, m, n, rs, cs, &b ); + bli_obj_create( dt, m, n, rs, cs, &c ); + bli_obj_create( dt, m, n, rs, cs, &d ); + bli_obj_create( dt, m, n, rs, cs, &e ); + + // Let's also create and initialize some scalar objects. + bli_obj_create_1x1( dt, &alpha ); + bli_obj_create_1x1( dt, &beta ); + bli_obj_create_1x1( dt, &gamma ); + + bli_setsc( 2.0, 0.0, &alpha ); + bli_setsc( 0.2, 0.0, &beta ); + bli_setsc( 3.0, 0.0, &gamma ); + + bli_printm( "alpha:", &alpha, "%4.1f", "" ); + bli_printm( "beta:", &beta, "%4.1f", "" ); + bli_printm( "gamma:", &gamma, "%4.1f", "" ); + + // Matrices, like vectors, can set by "broadcasting" a constant to every + // element. + bli_setm( &BLIS_ONE, &a ); + bli_setm( &alpha, &b ); + bli_setm( &BLIS_ZERO, &c ); + + bli_printm( "a := 1.0", &a, "%4.1f", "" ); + bli_printm( "b := alpha", &b, "%4.1f", "" ); + bli_printm( "c := 0.0", &c, "%4.1f", "" ); + + + // + // Example 2: Randomize a matrix object. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Set a matrix to random values. + bli_randm( &e ); + + bli_printm( "e (randomized):", &e, "%4.1f", "" ); + + + // + // Example 3: Perform element-wise operations on matrices. + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // Copy a matrix. + bli_copym( &e, &d ); + bli_printm( "d := e", &d, "%4.1f", "" ); + + // Add and subtract vectors. + bli_addm( &a, &d ); + bli_printm( "d := d + a", &d, "%4.1f", "" ); + + bli_subm( &a, &e ); + bli_printm( "e := e - a", &e, "%4.1f", "" ); + + // Scale a matrix (destructive). + bli_scalm( &alpha, &e ); + bli_printm( "e := alpha * e", &e, "%4.1f", "" ); + + // Scale a matrix (non-destructive). + bli_scal2m( &beta, &e, &c ); + bli_printm( "c := beta * e", &c, "%4.1f", "" ); + + // Scale and accumulate between matrices. + bli_axpym( &alpha, &a, &c ); + bli_printm( "c := c + alpha * a", &c, "%4.1f", "" ); + + + // + // Example 4: Copy and transpose a matrix. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // Create an n-by-m matrix into which we can copy-transpose an m-by-n + // matrix. + bli_obj_create( dt, n, m, rs, cs, &f ); + + // Initialize all of 'f' to -1.0 to simulate junk values. + bli_setm( &BLIS_MINUS_ONE, &f ); + + bli_printm( "e:", &e, "%4.1f", "" ); + bli_printm( "f (initial value):", &f, "%4.1f", "" ); + + // Since we are going to copy 'e' to 'f', we need to indicate a transpose + // on 'e', the input operand. Transposition can be indicated by setting a + // bit in the object. Since it always starts out as "no transpose", we can + // simply toggle the bit. + bli_obj_toggle_trans( &e ); + + // Another way to mark and object for transposition is to set it directly. + //bli_obj_set_onlytrans( BLIS_TRANSPOSE, &e ); + + // A third way is to "apply" a transposition. This is equivalent to toggling + // the transposition when the value being applied is BLIS_TRANSPOSE. If + // the value applied is BLIS_NO_TRANSPOSE, the transposition bit in the + // targeted object is unaffected. (Applying transposes is more useful in + // practice when the 'trans' argument is a variable and not a constant + // literal.) + //bli_obj_apply_trans( BLIS_TRANSPOSE, &e ); + //bli_obj_apply_trans( BLIS_NO_TRANSPOSE, &e ); + //bli_obj_apply_trans( trans, &e ); + + // Copy 'e' to 'f', transposing 'e' in the process. Notice that we haven't + // modified any properties of 'd'. It's the source operand that matters + // when marking an operand for transposition, not the destination. + bli_copym( &e, &f ); + + bli_printm( "f (copied value):", &f, "%4.1f", "" ); + + + // + // Example 5: Copy and Hermitian-transpose a matrix. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + // Create an n-by-m complex matrix into which we can Hermitian-transpose + // (or, conjugate-transpose) another complex (m-by-n) matrix. + dt = BLIS_DCOMPLEX; + bli_obj_create( dt, m, n, rs, cs, &g ); + bli_obj_create( dt, n, m, rs, cs, &h ); + + // Randomize 'g', the input operand. + bli_randm( &g ); + + // Initialize all of 'h' to -1.0 to simulate junk values. + bli_setm( &BLIS_MINUS_ONE, &h ); + + bli_printm( "g:", &g, "%4.1f", "" ); + bli_printm( "h (initial value):", &h, "%4.1f", "" ); + + // Set both the transpose and conjugation bits. + bli_obj_toggle_trans( &g ); + bli_obj_toggle_conj( &g ); + + // Copy 'g' to 'h', conjugating and transposing 'g' in the process. + // Once again, notice that it's the source operand that we've marked for + // conjugation. + bli_copym( &g, &h ); + + bli_printm( "h (copied value):", &h, "%4.1f", "" ); + + + // Free the objects. + bli_obj_free( &alpha ); + bli_obj_free( &beta ); + bli_obj_free( &gamma ); + bli_obj_free( &a ); + bli_obj_free( &b ); + bli_obj_free( &c ); + bli_obj_free( &d ); + bli_obj_free( &e ); + bli_obj_free( &f ); + bli_obj_free( &g ); + bli_obj_free( &h ); + + return hpx::finalize(); +} + +// ----------------------------------------------------------------------------- + +int main( int argc, char** argv ) { + return hpx::init(argc, argv); +} diff --git a/examples/hpx/oapi/07level1m_diag.cpp b/examples/hpx/oapi/07level1m_diag.cpp new file mode 100644 index 0000000000..ec656c58b4 --- /dev/null +++ b/examples/hpx/oapi/07level1m_diag.cpp @@ -0,0 +1,337 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +#include +#include + +int hpx_main( int argc, char** argv ) +{ + num_t dt; + dim_t m, n; + inc_t rs, cs; + + + // + // This file demonstrates level-1m operations on structured matrices. + // + + + // + // Example 1: Initialize the upper triangle of a matrix to random values. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + obj_t a; + + // Create a matrix to work with. + dt = BLIS_DOUBLE; + m = 5; n = 5; rs = 0; cs = 0; + bli_obj_create( dt, m, n, rs, cs, &a ); + + // First, we mark the matrix structure as triangular. + bli_obj_set_struc( BLIS_TRIANGULAR, &a ); + + // Next, we specify whether the lower part or the upper part is to be + // recognized as the "stored" region (which we call the uplo field). The + // strictly opposite part (in this case, the strictly lower region) will + // be *assumed* to be zero during computation. However, when printed out, + // the strictly lower part may contain junk values. + bli_obj_set_uplo( BLIS_UPPER, &a ); + + // Now set the upper triangle to random values. + bli_randm( &a ); + + bli_printm( "a: randomize upper part (lower part may contain garbage)", &a, "%4.1f", "" ); + + + // + // Example 2: Initialize the upper triangle of a matrix to random values + // but also explicitly set the strictly lower triangle to zero. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + obj_t b, bl; + + // Create a matrix to work with. + dt = BLIS_DOUBLE; + m = 5; n = 5; rs = 0; cs = 0; + bli_obj_create( dt, m, n, rs, cs, &b ); + + // Set structure and uplo. + bli_obj_set_struc( BLIS_TRIANGULAR, &b ); + bli_obj_set_uplo( BLIS_UPPER, &b ); + + // Create an alias, 'bl', of the original object 'b'. Both objects will + // refer to the same underlying matrix elements, but now we will have two + // different "views" into the matrix. Aliases are simply "shallow copies" + // of the objects, meaning no additional memory allocation takes place. + // Therefore it is up to the API user (you) to make sure that you only + // free the original object (or exactly one of the aliases). + bli_obj_alias_to( &b, &bl ); + + // Digression: Each object contains a diagonal offset (even vectors), + // even if it is never needed. The diagonal offset for a newly-created + // object (ie: objects created via bli_obj_create*()) defaults to 0, + // meaning it intersects element (0,0), but it can be changed. When the + // diagonal offset delta is positive, the diagonal intersects element + // (0,delta). When the diagonal offset is negative, the diagonal + // intersects element (-delta,0). In other words, think of element (0,0) + // as the origin of a coordinate plane, with the diagonal being the + // x-axis value. + + // Set the diagonal offset of 'bl' to -1. + bli_obj_set_diag_offset( -1, &bl ); + + // Set the uplo field of 'bl' to "lower". + bli_obj_set_uplo( BLIS_LOWER, &bl ); + + // Set the upper triangle of 'b' to random values. + bli_randm( &b ); + + // Set the strictly lower triangle of 'b' to zero (by setting the lower + // triangle of 'bl' to zero). + bli_setm( &BLIS_ZERO, &bl ); + + bli_printm( "b: randomize upper part; set strictly lower part to 0.0", &b, "%4.1f", "" ); + + // You may not see the effect of setting the strictly lower part to zero, + // since those values may already be zero (instead of random junk). So + // let's set it to something you'll notice, like -1.0. + bli_setm( &BLIS_MINUS_ONE, &bl ); + + bli_printm( "b: randomize upper part; set strictly lower part to -1.0", &b, "%4.1f", "" ); + + + // + // Example 3: Copy the lower triangle of an existing object to a newly + // created (but otherwise uninitialized) object. + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + obj_t c; + + // Create a matrix to work with. + dt = BLIS_DOUBLE; + m = 5; n = 5; rs = 0; cs = 0; + bli_obj_create( dt, m, n, rs, cs, &c ); + + // Reset the diagonal offset of 'bl' to 0. + bli_obj_set_diag_offset( 0, &bl ); + + // Copy the lower triangle of matrix 'b' from Example 2 to object 'c'. + // This should give us -1.0 in the strictly lower part and some non-zero + // random values along the diagonal. Note that since 'c' is starting out + // uninitialized, the strictly upper part could contain junk. + bli_copym( &bl, &c ); + + bli_printm( "c: copy lower part of b (upper part may contain garbage)", &c, "%4.1f", "" ); + + // Notice that the structure and uplo properties of 'c' were set to their + // default values, BLIS_GENERAL and BLIS_DENSE, respectively. Thus, it is + // the structure and uplo of the *source* operand that controls what gets + // copied, regardless of the structure/uplo of the destination. To + // demonstrate this further, let's see what happens when we copy 'bl' + // (which is lower triangular) to 'a' (which is upper triangular). + + bli_copym( &bl, &a ); + + // The result is that the lower part (diagonal and strictly lower part) is + // copied into 'a', but the elements in the strictly upper part of 'a' are + // unaffected. Note, however, that 'a' is still marked as upper triangular + // and so in future computations where 'a' is an input operand, the -1.0 + // values that were copied from 'bl' into the lower triangle will be + // ignored. Generally speaking, level-1m operations on triangular matrices + // ignore the "unstored" regions of input operands because they are assumed + // to be zero). + + bli_printm( "a: copy lower triangular bl to upper triangular a", &a, "%4.1f", "" ); + + + // + // Example 4: Copy the lower triangle of an existing object into the + // upper triangle of an existing object. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + obj_t d; + + // Create a matrix to work with. + dt = BLIS_DOUBLE; + m = 5; n = 5; rs = 0; cs = 0; + bli_obj_create( dt, m, n, rs, cs, &d ); + + // Let's start by setting entire destination matrix to zero. + bli_setm( &BLIS_ZERO, &d ); + + bli_printm( "d: initial value (all zeros)", &d, "%4.1f", "" ); + + // Recall that 'bl' is marked as lower triangular with a diagonal offset + // of 0. Also recall that 'bl' is an alias of 'b', which is now fully + // initialized. But let's change a few values manually so we can later + // see the full effect of the transposition. + bli_setijm( 2.0, 0.0, 2, 0, &bl ); + bli_setijm( 3.0, 0.0, 3, 0, &bl ); + bli_setijm( 4.0, 0.0, 4, 0, &bl ); + bli_setijm( 3.1, 0.0, 3, 1, &bl ); + bli_setijm( 3.2, 0.0, 3, 2, &bl ); + + bli_printm( "bl: lower triangular bl is aliased to b", &bl, "%4.1f", "" ); + + // We want to pluck out the lower triangle and transpose it into the upper + // triangle of 'd'. + bli_obj_toggle_trans( &bl ); + + // Now we copy the transpose of the lower part of 'bl' into the upper + // part of 'd'. (Again, notice that we haven't modified any properties of + // 'd'. It's the source operand that matters, not the destination!) + bli_copym( &bl, &d ); + + bli_printm( "d: transpose of lower triangular of bl copied to d", &d, "%4.1f", "" ); + + + // + // Example 5: Create a rectangular matrix (m > n) with a lower trapezoid + // containing random values, then set the strictly upper + // triangle to zeros. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + obj_t e, el; + + // Create a matrix to work with. + dt = BLIS_DOUBLE; + m = 6; n = 4; rs = 0; cs = 0; + bli_obj_create( dt, m, n, rs, cs, &e ); + + // Initialize the entire matrix to -1.0 to simulate junk values. + bli_setm( &BLIS_MINUS_ONE, &e ); + + bli_printm( "e: initial value (all -1.0)", &e, "%4.1f", "" ); + + // Create an alias to work with. + bli_obj_alias_to( &e, &el ); + + // Set structure and uplo of 'el'. + bli_obj_set_struc( BLIS_TRIANGULAR, &el ); + bli_obj_set_uplo( BLIS_LOWER, &el ); + + // Digression: Notice that "triangular" structure does not require that + // the matrix be square. Rather, it simply means that either the part above + // or below the diagonal will be assumed to be zero. + + // Randomize the lower trapezoid. + bli_randm( &el ); + + bli_printm( "e: after lower trapezoid randomized", &e, "%4.1f", "" ); + + // Move the diagonal offset of 'el' to 1 and flip the uplo field to + // "upper". + bli_obj_set_diag_offset( 1, &el ); + bli_obj_set_uplo( BLIS_UPPER, &el ); + + // Set the upper triangle to zero. + bli_setm( &BLIS_ZERO, &el ); + + bli_printm( "e: after upper triangle set to zero", &e, "%4.1f", "" ); + + + // + // Example 6: Create an upper Hessenberg matrix of random values and then + // set the "unstored" values to zero. + // + + printf( "\n#\n# -- Example 6 --\n#\n\n" ); + + obj_t h, hl; + + // Create a matrix to work with. + dt = BLIS_DOUBLE; + m = 5; n = 5; rs = 0; cs = 0; + bli_obj_create( dt, m, n, rs, cs, &h ); + + // Initialize the entire matrix to -1.0 to simulate junk values. + bli_setm( &BLIS_MINUS_ONE, &h ); + + bli_printm( "h: initial value (all -1.0)", &h, "%4.1f", "" ); + + // Set the diagonal offset of 'h' to -1. + bli_obj_set_diag_offset( -1, &h ); + + // Set the structure and uplo of 'h'. + bli_obj_set_struc( BLIS_TRIANGULAR, &h ); + bli_obj_set_uplo( BLIS_UPPER, &h ); + + // Randomize the elements on and above the first subdiagonal. + bli_randm( &h ); + + bli_printm( "h: after randomizing above first subdiagonal", &h, "%4.1f", "" ); + + // Create an alias to work with. + bli_obj_alias_to( &h, &hl ); + + // Flip the uplo of 'hl' and move the diagonal down by one. + bli_obj_set_uplo( BLIS_LOWER, &hl ); + bli_obj_set_diag_offset( -2, &hl ); + + // Set the region strictly below the first subdiagonal (on or below + // the second subdiagonal) to zero. + bli_setm( &BLIS_ZERO, &hl ); + + bli_printm( "h: after setting elements below first subdiagonal to zero", &h, "%4.1f", "" ); + + + // Free the objects. + bli_obj_free( &a ); + bli_obj_free( &b ); + bli_obj_free( &c ); + bli_obj_free( &d ); + bli_obj_free( &e ); + bli_obj_free( &h ); + + return hpx::finalize(); +} + +// ----------------------------------------------------------------------------- + +int main( int argc, char** argv ) { + return hpx::init(argc, argv); +} diff --git a/examples/hpx/oapi/08level2.cpp b/examples/hpx/oapi/08level2.cpp new file mode 100644 index 0000000000..659e10ddd8 --- /dev/null +++ b/examples/hpx/oapi/08level2.cpp @@ -0,0 +1,333 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +#include +#include + +int hpx_main( int argc, char** argv ) +{ + num_t dt; + dim_t m, n; + inc_t rs, cs; + + obj_t a, x, y, b; + obj_t* alpha; + obj_t* beta; + + // + // This file demonstrates level-2 operations. + // + + + // + // Example 1: Perform a general rank-1 update (ger) operation. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + dt = BLIS_DOUBLE; + m = 4; n = 5; rs = 0; cs = 0; + bli_obj_create( dt, m, n, rs, cs, &a ); + bli_obj_create( dt, m, 1, rs, cs, &x ); + bli_obj_create( dt, 1, n, rs, cs, &y ); + + // Set alpha. + alpha = (obj_t*)(&BLIS_ONE); + + // Initialize vectors 'x' and 'y'. + bli_randv( &x ); + bli_setv( &BLIS_MINUS_ONE, &y ); + + // Initialize 'a' to 1.0. + bli_setm( &BLIS_ONE, &a ); + + bli_printm( "x: set to random values", &x, "%4.1f", "" ); + bli_printm( "y: set to -1.0", &y, "%4.1f", "" ); + bli_printm( "a: initial value", &a, "%4.1f", "" ); + + // a := a + alpha * x * y, where 'a' is general. + bli_ger( alpha, &x, &y, &a ); + + bli_printm( "a: after ger", &a, "%4.1f", "" ); + + // Free the objects. + bli_obj_free( &a ); + bli_obj_free( &x ); + bli_obj_free( &y ); + + + // + // Example 2: Perform a general matrix-vector multiply (gemv) operation. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + dt = BLIS_DOUBLE; + m = 4; n = 5; rs = 0; cs = 0; + bli_obj_create( dt, m, n, rs, cs, &a ); + bli_obj_create( dt, 1, n, rs, cs, &x ); + bli_obj_create( dt, 1, m, rs, cs, &y ); + + // Notice that we created vectors 'x' and 'y' as row vectors, even though + // we often think of them as column vectors so that the overall problem + // dimensions remain conformal. Note that this flexibility only comes + // from the fact that the operation requires those operands to be vectors. + // If we were instead looking at an operation where the operands were of + // general shape (such as with the gemm operation), then typically the + // dimensions matter, and column vectors would not be interchangeable with + // row vectors and vice versa. + + // Set the scalars to use. + alpha = (obj_t*)(&BLIS_ONE); + beta = (obj_t*)(&BLIS_ONE); + + // Initialize vectors 'x' and 'y'. + bli_setv( &BLIS_ONE, &x ); + bli_setv( &BLIS_ZERO, &y ); + + // Randomize 'a'. + bli_randm( &a ); + + bli_printm( "a: randomized", &a, "%4.1f", "" ); + bli_printm( "x: set to 1.0", &x, "%4.1f", "" ); + bli_printm( "y: initial value", &y, "%4.1f", "" ); + + // y := beta * y + alpha * a * x, where 'a' is general. + bli_gemv( alpha, &a, &x, beta, &y ); + + bli_printm( "y: after gemv", &y, "%4.1f", "" ); + + // Free the objects. + bli_obj_free( &a ); + bli_obj_free( &x ); + bli_obj_free( &y ); + + + // + // Example 3: Perform a symmetric rank-1 update (syr) operation. + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + dt = BLIS_DOUBLE; + m = 5; rs = 0; cs = 0; + bli_obj_create( dt, m, m, rs, cs, &a ); + bli_obj_create( dt, 1, m, rs, cs, &x ); + + // Set alpha. + alpha = (obj_t*)(&BLIS_ONE); + + // Initialize vector 'x'. + bli_randv( &x ); + + // Zero out all of matrix 'a'. This is optional, but will avoid possibly + // displaying junk values in the unstored triangle. + bli_setm( &BLIS_ZERO, &a ); + + // Mark matrix 'a' as symmetric and stored in the lower triangle, and + // then randomize that lower triangle. + bli_obj_set_struc( BLIS_SYMMETRIC, &a ); + bli_obj_set_uplo( BLIS_LOWER, &a ); + bli_randm( &a ); + + bli_printm( "x: set to random values", &x, "%4.1f", "" ); + bli_printm( "a: initial value (zeros in upper triangle)", &a, "%4.1f", "" ); + + // a := a + alpha * x * x^T, where 'a' is symmetric and lower-stored. + bli_syr( alpha, &x, &a ); + + bli_printm( "a: after syr", &a, "%4.1f", "" ); + + // Free the objects. + bli_obj_free( &a ); + bli_obj_free( &x ); + + + // + // Example 4: Perform a symmetric matrix-vector multiply (symv) operation. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + dt = BLIS_DOUBLE; + m = 5; rs = 0; cs = 0; + bli_obj_create( dt, m, m, rs, cs, &a ); + bli_obj_create( dt, 1, m, rs, cs, &x ); + bli_obj_create( dt, 1, m, rs, cs, &y ); + + // Set the scalars to use. + alpha = (obj_t*)(&BLIS_ONE); + beta = (obj_t*)(&BLIS_ONE); + + // Initialize vectors 'x' and 'y'. + bli_setv( &BLIS_ONE, &x ); + bli_setv( &BLIS_ZERO, &y ); + + // Zero out all of matrix 'a'. This is optional, but will avoid possibly + // displaying junk values in the unstored triangle. + bli_setm( &BLIS_ZERO, &a ); + + // Mark matrix 'a' as symmetric and stored in the upper triangle, and + // then randomize that upper triangle. + bli_obj_set_struc( BLIS_SYMMETRIC, &a ); + bli_obj_set_uplo( BLIS_UPPER, &a ); + bli_randm( &a ); + + bli_printm( "a: randomized (zeros in lower triangle)", &a, "%4.1f", "" ); + bli_printm( "x: set to 1.0", &x, "%4.1f", "" ); + bli_printm( "y: initial value", &y, "%4.1f", "" ); + + // y := beta * y + alpha * a * x, where 'a' is symmetric and upper-stored. + bli_symv( alpha, &a, &x, beta, &y ); + + bli_printm( "y: after symv", &y, "%4.1f", "" ); + + // Free the objects. + bli_obj_free( &a ); + bli_obj_free( &x ); + bli_obj_free( &y ); + + + // + // Example 5: Perform a triangular matrix-vector multiply (trmv) operation. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + dt = BLIS_DOUBLE; + m = 5; rs = 0; cs = 0; + bli_obj_create( dt, m, m, rs, cs, &a ); + bli_obj_create( dt, 1, m, rs, cs, &x ); + + // Set the scalars to use. + alpha = (obj_t*)(&BLIS_ONE); + + // Initialize vector 'x'. + bli_setv( &BLIS_ONE, &x ); + + // Zero out all of matrix 'a'. This is optional, but will avoid possibly + // displaying junk values in the unstored triangle. + bli_setm( &BLIS_ZERO, &a ); + + // Mark matrix 'a' as triangular, stored in the lower triangle, and + // having a non-unit diagonal. Then randomize that lower triangle. + bli_obj_set_struc( BLIS_TRIANGULAR, &a ); + bli_obj_set_uplo( BLIS_LOWER, &a ); + bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); + bli_randm( &a ); + + bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" ); + bli_printm( "x: initial value", &x, "%4.1f", "" ); + + // x := alpha * a * x, where 'a' is triangular and lower-stored. + bli_trmv( alpha, &a, &x ); + + bli_printm( "x: after trmv", &x, "%4.1f", "" ); + + // Free the objects. + bli_obj_free( &a ); + bli_obj_free( &x ); + + + // + // Example 6: Perform a triangular solve (trsv) operation. + // + + printf( "\n#\n# -- Example 6 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + dt = BLIS_DOUBLE; + m = 5; rs = 0; cs = 0; + bli_obj_create( dt, m, m, rs, cs, &a ); + bli_obj_create( dt, 1, m, rs, cs, &b ); + bli_obj_create( dt, 1, m, rs, cs, &y ); + + // Set the scalars to use. + alpha = (obj_t*)(&BLIS_ONE); + + // Initialize vector 'x'. + bli_setv( &BLIS_ONE, &b ); + + // Zero out all of matrix 'a'. This is optional, but will avoid possibly + // displaying junk values in the unstored triangle. + bli_setm( &BLIS_ZERO, &a ); + + // Mark matrix 'a' as triangular, stored in the lower triangle, and + // having a non-unit diagonal. Then randomize that lower triangle. + bli_obj_set_struc( BLIS_TRIANGULAR, &a ); + bli_obj_set_uplo( BLIS_LOWER, &a ); + bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); + bli_randm( &a ); + + // Load the diagonal. By setting the diagonal to something of greater + // absolute value than the off-diagonal elements, we increase the odds + // that the matrix is not singular (singular matrices have no inverse). + bli_shiftd( &BLIS_TWO, &a ); + + bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" ); + bli_printm( "b: initial value", &b, "%4.1f", "" ); + + // solve a * x = alpha * b, where 'a' is triangular and lower-stored, and + // overwrite b with the solution vector x. + bli_trsv( alpha, &a, &b ); + + bli_printm( "b: after trsv", &b, "%4.1f", "" ); + + // We can confirm the solution by comparing the product of a and x to the + // original value of b. + bli_copyv( &b, &y ); + bli_trmv( alpha, &a, &y ); + + bli_printm( "y: should equal initial value of b", &y, "%4.1f", "" ); + + // Free the objects. + bli_obj_free( &a ); + bli_obj_free( &b ); + + return hpx::finalize(); +} + +// ----------------------------------------------------------------------------- +// +int main( int argc, char** argv ) { + return hpx::init(argc, argv); +} diff --git a/examples/hpx/oapi/09level3.cpp b/examples/hpx/oapi/09level3.cpp new file mode 100644 index 0000000000..7aad1a13a8 --- /dev/null +++ b/examples/hpx/oapi/09level3.cpp @@ -0,0 +1,338 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +#include +#include + +int hpx_main( int argc, char** argv ) +{ + num_t dt; + dim_t m, n, k; + inc_t rs, cs; + side_t side; + + obj_t a, b, c; + obj_t* alpha; + obj_t* beta; + + + // + // This file demonstrates level-3 operations. + // + + + // + // Example 1: Perform a general matrix-matrix multiply (gemm) operation. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // Create some matrix operands to work with. + dt = BLIS_DOUBLE; + m = 4; n = 5; k = 3; rs = 0; cs = 0; + bli_obj_create( dt, m, n, rs, cs, &c ); + bli_obj_create( dt, m, k, rs, cs, &a ); + bli_obj_create( dt, k, n, rs, cs, &b ); + + // Set the scalars to use. + alpha = (obj_t*)(&BLIS_ONE); + beta = (obj_t*)(&BLIS_ONE); + + // Initialize the matrix operands. + bli_randm( &a ); + bli_setm( &BLIS_ONE, &b ); + bli_setm( &BLIS_ZERO, &c ); + + bli_printm( "a: randomized", &a, "%4.1f", "" ); + bli_printm( "b: set to 1.0", &b, "%4.1f", "" ); + bli_printm( "c: initial value", &c, "%4.1f", "" ); + + // c := beta * c + alpha * a * b, where 'a', 'b', and 'c' are general. + bli_gemm( alpha, &a, &b, beta, &c ); + + bli_printm( "c: after gemm", &c, "%4.1f", "" ); + + // Free the objects. + bli_obj_free( &a ); + bli_obj_free( &b ); + bli_obj_free( &c ); + + + // + // Example 1b: Perform a general matrix-matrix multiply (gemm) operation + // with the left input operand (matrix A) transposed. + // + + printf( "\n#\n# -- Example 1b --\n#\n\n" ); + + // Create some matrix operands to work with. + dt = BLIS_DOUBLE; + m = 4; n = 5; k = 3; rs = 0; cs = 0; + bli_obj_create( dt, m, n, rs, cs, &c ); + bli_obj_create( dt, k, m, rs, cs, &a ); + bli_obj_create( dt, k, n, rs, cs, &b ); + + // Set the scalars to use. + alpha = (obj_t*)(&BLIS_ONE); + beta = (obj_t*)(&BLIS_ONE); + + // Initialize the matrix operands. + bli_randm( &a ); + bli_setm( &BLIS_ONE, &b ); + bli_setm( &BLIS_ZERO, &c ); + + // Set the transpose bit in 'a'. + bli_obj_toggle_trans( &a ); + + bli_printm( "a: randomized", &a, "%4.1f", "" ); + bli_printm( "b: set to 1.0", &b, "%4.1f", "" ); + bli_printm( "c: initial value", &c, "%4.1f", "" ); + + // c := beta * c + alpha * a^T * b, where 'a', 'b', and 'c' are general. + bli_gemm( alpha, &a, &b, beta, &c ); + + bli_printm( "c: after gemm", &c, "%4.1f", "" ); + + // Free the objects. + bli_obj_free( &a ); + bli_obj_free( &b ); + bli_obj_free( &c ); + + + // + // Example 2: Perform a symmetric rank-k update (syrk) operation. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + dt = BLIS_DOUBLE; + m = 5; k = 3; rs = 0; cs = 0; + bli_obj_create( dt, m, m, rs, cs, &c ); + bli_obj_create( dt, m, k, rs, cs, &a ); + + // Set alpha. + alpha = (obj_t*)(&BLIS_ONE); + + // Initialize matrix operands. + bli_setm( &BLIS_ZERO, &c ); + bli_randm( &a ); + + // Mark matrix 'c' as symmetric and stored in the lower triangle, and + // then randomize that lower triangle. + bli_obj_set_struc( BLIS_SYMMETRIC, &c ); + bli_obj_set_uplo( BLIS_LOWER, &c ); + bli_randm( &c ); + + bli_printm( "a: set to random values", &a, "%4.1f", "" ); + bli_printm( "c: initial value (zeros in upper triangle)", &c, "%4.1f", "" ); + + // c := c + alpha * a * a^T, where 'c' is symmetric and lower-stored. + bli_syrk( alpha, &a, beta, &c ); + + bli_printm( "c: after syrk", &c, "%4.1f", "" ); + + // Free the objects. + bli_obj_free( &c ); + bli_obj_free( &a ); + + + // + // Example 3: Perform a symmetric matrix-matrix multiply (symm) operation. + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + dt = BLIS_DOUBLE; + m = 5; n = 6; rs = 0; cs = 0; + bli_obj_create( dt, m, m, rs, cs, &a ); + bli_obj_create( dt, m, n, rs, cs, &b ); + bli_obj_create( dt, m, n, rs, cs, &c ); + + // Set the scalars to use. + alpha = (obj_t*)(&BLIS_ONE); + beta = (obj_t*)(&BLIS_ONE); + + // Set the side operand. + side = BLIS_LEFT; + + // Initialize matrices 'b' and 'c'. + bli_setm( &BLIS_ONE, &b ); + bli_setm( &BLIS_ZERO, &c ); + + // Zero out all of matrix 'a'. This is optional, but will avoid possibly + // displaying junk values in the unstored triangle. + bli_setm( &BLIS_ZERO, &a ); + + // Mark matrix 'a' as symmetric and stored in the upper triangle, and + // then randomize that upper triangle. + bli_obj_set_struc( BLIS_SYMMETRIC, &a ); + bli_obj_set_uplo( BLIS_UPPER, &a ); + bli_randm( &a ); + + bli_printm( "a: randomized (zeros in lower triangle)", &a, "%4.1f", "" ); + bli_printm( "b: set to 1.0", &b, "%4.1f", "" ); + bli_printm( "c: initial value", &c, "%4.1f", "" ); + + // c := beta * c + alpha * a * b, where 'a' is symmetric and upper-stored. + // Note that the first 'side' operand indicates the side from which matrix + // 'a' is multiplied into 'b'. + bli_symm( side, alpha, &a, &b, beta, &c ); + + bli_printm( "c: after symm", &c, "%4.1f", "" ); + + // Free the objects. + bli_obj_free( &a ); + bli_obj_free( &b ); + bli_obj_free( &c ); + + + // + // Example 4: Perform a triangular matrix-matrix multiply (trmm) operation. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + dt = BLIS_DOUBLE; + m = 5; n = 4; rs = 0; cs = 0; + bli_obj_create( dt, m, m, rs, cs, &a ); + bli_obj_create( dt, m, n, rs, cs, &b ); + + // Set the scalars to use. + alpha = (obj_t*)(&BLIS_ONE); + + // Set the side operand. + side = BLIS_LEFT; + + // Initialize matrix 'b'. + bli_setm( &BLIS_ONE, &b ); + + // Zero out all of matrix 'a'. This is optional, but will avoid possibly + // displaying junk values in the unstored triangle. + bli_setm( &BLIS_ZERO, &a ); + + // Mark matrix 'a' as triangular, stored in the lower triangle, and + // having a non-unit diagonal. Then randomize that lower triangle. + bli_obj_set_struc( BLIS_TRIANGULAR, &a ); + bli_obj_set_uplo( BLIS_LOWER, &a ); + bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); + bli_randm( &a ); + + bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" ); + bli_printm( "b: initial value", &b, "%4.1f", "" ); + + // b := alpha * a * b, where 'a' is triangular and lower-stored. + bli_trmm( side, alpha, &a, &b ); + + bli_printm( "x: after trmm", &b, "%4.1f", "" ); + + // Free the objects. + bli_obj_free( &a ); + bli_obj_free( &b ); + + + // + // Example 5: Perform a triangular solve with multiple right-hand sides + // (trsm) operation. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + dt = BLIS_DOUBLE; + m = 5; n = 4; rs = 0; cs = 0; + bli_obj_create( dt, m, m, rs, cs, &a ); + bli_obj_create( dt, m, n, rs, cs, &b ); + bli_obj_create( dt, m, n, rs, cs, &c ); + + // Set the scalars to use. + alpha = (obj_t*)(&BLIS_ONE); + + // Set the side operand. + side = BLIS_LEFT; + + // Initialize matrix 'b'. + bli_setm( &BLIS_ONE, &b ); + + // Zero out all of matrix 'a'. This is optional, but will avoid possibly + // displaying junk values in the unstored triangle. + bli_setm( &BLIS_ZERO, &a ); + + // Mark matrix 'a' as triangular, stored in the lower triangle, and + // having a non-unit diagonal. Then randomize that lower triangle. + bli_obj_set_struc( BLIS_TRIANGULAR, &a ); + bli_obj_set_uplo( BLIS_LOWER, &a ); + bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); + bli_randm( &a ); + + // Load the diagonal. By setting the diagonal to something of greater + // absolute value than the off-diagonal elements, we increase the odds + // that the matrix is not singular (singular matrices have no inverse). + bli_shiftd( &BLIS_TWO, &a ); + + bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" ); + bli_printm( "b: initial value", &b, "%4.1f", "" ); + + // solve a * x = alpha * b, where 'a' is triangular and lower-stored, and + // overwrite b with the solution matrix x. + bli_trsm( side, alpha, &a, &b ); + + bli_printm( "b: after trsm", &b, "%4.1f", "" ); + + // We can confirm the solution by comparing the product of a and x to the + // original value of b. + bli_copym( &b, &c ); + bli_trmm( side, alpha, &a, &c ); + + bli_printm( "c: should equal initial value of b", &c, "%4.1f", "" ); + + // Free the objects. + bli_obj_free( &a ); + bli_obj_free( &b ); + bli_obj_free( &c ); + + + return hpx::finalize(); +} + +// ----------------------------------------------------------------------------- + +int main( int argc, char** argv ) { + return hpx::init(argc, argv); +} diff --git a/examples/hpx/oapi/Makefile b/examples/hpx/oapi/Makefile new file mode 100644 index 0000000000..5607d6d2db --- /dev/null +++ b/examples/hpx/oapi/Makefile @@ -0,0 +1,16 @@ +CXX=g++ +all: + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=00obj_basic -o 00obj_basic 00obj_basic.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=01obj_attach -o 01obj_attach 01obj_attach.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=02obj_ij -o 02obj_ij 02obj_ij.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=03obj_view -o 03obj_view 03obj_view.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=04level0 -o 04level0 04level0.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=05level1v -o 05level1v 05level1v.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=06level1m -o 06level1m 06level1m.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=07level1m_diag -o 07level1m_diag 07level1m_diag.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=08level2 -o 08level2 08level2.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=09level3 -o09level3 09level3.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + +clean: + rm 00obj_basic 02obj_ij 04level0 06level1m 08level2 01obj_attach 03obj_view 05level1v 07level1m_diag 09level3 + From 4071f9e062144d3012778fc9e7e97ec88eb29833 Mon Sep 17 00:00:00 2001 From: ctaylor Date: Tue, 1 Nov 2022 19:39:29 -0500 Subject: [PATCH 08/21] initial import --- examples/hpx/oapi/Makefile | 3 +- examples/hpx/tapi/00level1v.cpp | 191 +++++++++++++++ examples/hpx/tapi/01level1m.cpp | 224 +++++++++++++++++ examples/hpx/tapi/02level1m_diag.cpp | 251 +++++++++++++++++++ examples/hpx/tapi/03level2.cpp | 321 ++++++++++++++++++++++++ examples/hpx/tapi/04level3.cpp | 349 +++++++++++++++++++++++++++ examples/hpx/tapi/05util.cpp | 287 ++++++++++++++++++++++ examples/hpx/tapi/Makefile | 12 + 8 files changed, 1637 insertions(+), 1 deletion(-) create mode 100644 examples/hpx/tapi/00level1v.cpp create mode 100644 examples/hpx/tapi/01level1m.cpp create mode 100644 examples/hpx/tapi/02level1m_diag.cpp create mode 100644 examples/hpx/tapi/03level2.cpp create mode 100644 examples/hpx/tapi/04level3.cpp create mode 100644 examples/hpx/tapi/05util.cpp create mode 100644 examples/hpx/tapi/Makefile diff --git a/examples/hpx/oapi/Makefile b/examples/hpx/oapi/Makefile index 5607d6d2db..a8c587b066 100644 --- a/examples/hpx/oapi/Makefile +++ b/examples/hpx/oapi/Makefile @@ -1,4 +1,5 @@ -CXX=g++ +-include $(SHARE_PATH)/common.mk + all: $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=00obj_basic -o 00obj_basic 00obj_basic.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=01obj_attach -o 01obj_attach 01obj_attach.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis diff --git a/examples/hpx/tapi/00level1v.cpp b/examples/hpx/tapi/00level1v.cpp new file mode 100644 index 0000000000..83289c3274 --- /dev/null +++ b/examples/hpx/tapi/00level1v.cpp @@ -0,0 +1,191 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +#include +#include + +int hpx_main( int argc, char** argv ) +{ + double* x; + double* y; + double* z; + double* w; + double* a; + double alpha, beta, gamma; + dim_t m, n; + inc_t rs, cs; + + // Initialize some basic constants. + double zero = 0.0; + double one = 1.0; + double minus_one = -1.0; + + + // + // This file demonstrates working with vectors and the level-1v + // operations. + // + + + // + // Example 1: Create vectors and then broadcast (copy) scalar + // values to all elements. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // Create a few vectors to work with. We make them all of the same length + // so that we can perform operations between them. + // NOTE: We've chosen to use row vectors here (1x4) instead of column + // vectors (4x1) to allow for easier reading of standard output (less + // scrolling). + m = 1; n = 4; rs = n; cs = 1; + x = static_cast(malloc( m * n * sizeof( double ) )); + y = static_cast(malloc( m * n * sizeof( double ) )); + z = static_cast(malloc( m * n * sizeof( double ) )); + w = static_cast(malloc( m * n * sizeof( double ) )); + a = static_cast(malloc( m * n * sizeof( double ) )); + + // Let's initialize some scalars. + alpha = 2.0; + beta = 0.2; + gamma = 3.0; + + printf( "alpha:\n%4.1f\n\n", alpha ); + printf( "beta:\n%4.1f\n\n", beta ); + printf( "gamma:\n%4.1f\n\n", gamma ); + printf( "\n" ); + + bli_dsetv( BLIS_NO_CONJUGATE, n, &one, x, 1 ); + bli_dsetv( BLIS_NO_CONJUGATE, n, &alpha, y, 1 ); + bli_dsetv( BLIS_NO_CONJUGATE, n, &zero, z, 1 ); + + // Note that we can use printv or printm to print vectors since vectors + // are also matrices. We choose to use printm because it honors the + // orientation of the vector (row or column) when printing, whereas + // printv always prints vectors as column vectors regardless of their + // they are 1 x n or n x 1. + bli_dprintm( "x := 1.0", m, n, x, rs, cs, "%4.1f", "" ); + bli_dprintm( "y := alpha", m, n, y, rs, cs, "%4.1f", "" ); + bli_dprintm( "z := 0.0", m, n, z, rs, cs, "%4.1f", "" ); + + + // + // Example 2: Randomize a vector. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Set a vector to random values. + bli_drandv( n, w, 1 ); + + bli_dprintm( "x := randv()", m, n, w, rs, cs, "%4.1f", "" ); + + + // + // Example 3: Perform various element-wise operations on vectors. + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // Copy a vector. + bli_dcopyv( BLIS_NO_CONJUGATE, n, w, 1, a, 1 ); + bli_dprintm( "a := w", m, n, a, rs, cs, "%4.1f", "" ); + + // Add and subtract vectors. + bli_daddv( BLIS_NO_CONJUGATE, n, y, 1, a, 1 ); + bli_dprintm( "a := a + y", m, n, a, rs, cs, "%4.1f", "" ); + + bli_dsubv( BLIS_NO_CONJUGATE, n, w, 1, a, 1 ); + bli_dprintm( "a := a + w", m, n, a, rs, cs, "%4.1f", "" ); + + // Scale a vector (destructive). + bli_dscalv( BLIS_NO_CONJUGATE, n, &beta, a, 1 ); + bli_dprintm( "a := beta * a", m, n, a, rs, cs, "%4.1f", "" ); + + // Scale a vector (non-destructive). + bli_dscal2v( BLIS_NO_CONJUGATE, n, &gamma, a, 1, z, 1 ); + bli_dprintm( "z := gamma * a", m, n, z, rs, cs, "%4.1f", "" ); + + // Scale and accumulate between vectors. + bli_daxpyv( BLIS_NO_CONJUGATE, n, &alpha, w, 1, x, 1 ); + bli_dprintm( "x := x + alpha * w", m, n, x, rs, cs, "%4.1f", "" ); + + bli_dxpbyv( BLIS_NO_CONJUGATE, n, w, 1, &minus_one, x, 1 ); + bli_dprintm( "x := -1.0 * x + w", m, n, x, rs, cs, "%4.1f", "" ); + + // Invert a vector element-wise. + bli_dinvertv( n, y, 1 ); + bli_dprintm( "y := 1 / y", m, n, y, rs, cs, "%4.1f", "" ); + + // Swap two vectors. + bli_dswapv( n, x, 1, y, 1 ); + bli_dprintm( "x (after swapping with y)", m, n, x, rs, cs, "%4.1f", "" ); + bli_dprintm( "y (after swapping with x)", m, n, y, rs, cs, "%4.1f", "" ); + + + // + // Example 4: Perform contraction-like operations on vectors. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // Perform a dot product. + bli_ddotv( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, n, a, 1, z, 1, &gamma ); + printf( "gamma := a * z (dot product):\n%5.2f\n\n", gamma ); + + // Perform an extended dot product. + bli_ddotxv( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, n, &alpha, a, 1, z, 1, &one, &gamma ); + printf( "gamma := 1.0 * gamma + alpha * a * z (accumulate scaled dot product):\n%5.2f\n\n", gamma ); + + + // Free the memory obtained via malloc(). + free( x ); + free( y ); + free( z ); + free( w ); + free( a ); + + return hpx::finalize(); +} + +// ----------------------------------------------------------------------------- + +int main(int argc, char** argv) { + return hpx::init(argc, argv); +} + diff --git a/examples/hpx/tapi/01level1m.cpp b/examples/hpx/tapi/01level1m.cpp new file mode 100644 index 0000000000..8337a12edc --- /dev/null +++ b/examples/hpx/tapi/01level1m.cpp @@ -0,0 +1,224 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +#include +#include + +int hpx_main( int argc, char** argv ) +{ + double* a; + double* b; + double* c; + double* d; + double* e; + double* f; + dcomplex* g; + dcomplex* h; + double alpha, beta, gamma; + dim_t m, n; + inc_t rs, cs; + + // Initialize some basic constants. + double zero = 0.0; + double one = 1.0; + double minus_one = -1.0; + dcomplex minus_one_z = {-1.0, 0.0}; + + + // + // This file demonstrates working with matrices and the level-1m + // operations. + // + + + // + // Example 1: Create matrices and then broadcast (copy) scalar + // values to all elements. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // Create a few matrices to work with. We make them all of the same + // dimensions so that we can perform operations between them. + m = 2; n = 3; rs = 1; cs = m; + a = static_cast(malloc( m * n * sizeof( double ) )); + b = static_cast(malloc( m * n * sizeof( double ) )); + c = static_cast(malloc( m * n * sizeof( double ) )); + d = static_cast(malloc( m * n * sizeof( double ) )); + e = static_cast(malloc( m * n * sizeof( double ) )); + + // Let's initialize some scalars. + alpha = 2.0; + beta = 0.2; + gamma = 3.0; + + printf( "alpha:\n%4.1f\n\n", alpha ); + printf( "beta:\n%4.1f\n\n", beta ); + printf( "gamma:\n%4.1f\n\n", gamma ); + printf( "\n" ); + + // Matrices, like vectors, can set by "broadcasting" a constant to every + // element. Note that the second argument (0) is the diagonal offset. + // The diagonal offset is only used when the uplo value is something other + // than BLIS_DENSE (e.g. BLIS_LOWER or BLIS_UPPER). + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, &one, a, rs, cs ); + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, &alpha, b, rs, cs ); + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, &zero, c, rs, cs ); + + bli_dprintm( "a := 1.0", m, n, a, rs, cs, "%4.1f", "" ); + bli_dprintm( "b := alpha", m, n, b, rs, cs, "%4.1f", "" ); + bli_dprintm( "c := 0.0", m, n, c, rs, cs, "%4.1f", "" ); + + + // + // Example 2: Randomize a matrix object. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + bli_drandm( 0, BLIS_DENSE, m, n, e, rs, cs ); + + bli_dprintm( "e (randomized):", m, n, e, rs, cs, "%4.1f", "" ); + + + // + // Example 3: Perform element-wise operations on matrices. + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // Copy a matrix. + bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, + m, n, e, rs, cs, d, rs, cs ); + bli_dprintm( "d := e", m, n, d, rs, cs, "%4.1f", "" ); + + // Add and subtract vectors. + bli_daddm( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, + m, n, a, rs, cs, d, rs, cs ); + bli_dprintm( "d := d + a", m, n, d, rs, cs, "%4.1f", "" ); + + bli_dsubm( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, + m, n, a, rs, cs, e, rs, cs ); + bli_dprintm( "e := e - a", m, n, e, rs, cs, "%4.1f", "" ); + + // Scale a matrix (destructive). + bli_dscalm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, &alpha, e, rs, cs ); + bli_dprintm( "e := alpha * e", m, n, e, rs, cs, "%4.1f", "" ); + + // Scale a matrix (non-destructive). + bli_dscal2m( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, + m, n, &beta, e, rs, cs, c, rs, cs ); + bli_dprintm( "c := beta * e", m, n, c, rs, cs, "%4.1f", "" ); + + // Scale and accumulate between matrices. + bli_daxpym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, + m, n, &alpha, a, rs, cs, c, rs, cs ); + bli_dprintm( "c := alpha * a", m, n, c, rs, cs, "%4.1f", "" ); + + + // + // Example 4: Copy and transpose a matrix. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // Create an n-by-m matrix into which we can copy-transpose an m-by-n + // matrix. + f = static_cast(malloc( n * m * sizeof( double ) )); + dim_t rsf = 1, csf = n; + + // Initialize all of 'f' to -1.0 to simulate junk values. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + n, m, &minus_one, f, rsf, csf ); + + bli_dprintm( "e:", m, n, e, rs, cs, "%4.1f", "" ); + bli_dprintm( "f (initial value):", n, m, f, rsf, csf, "%4.1f", "" ); + + + // Copy 'e' to 'f', transposing 'e' in the process. Notice that we haven't + // modified any properties of 'd'. It's the source operand that matters + // when marking an operand for transposition, not the destination. + bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_TRANSPOSE, + n, m, e, rs, cs, f, rsf, csf ); + + bli_dprintm( "f (copied value):", n, m, f, rsf, csf, "%4.1f", "" ); + + + // + // Example 5: Copy and Hermitian-transpose a matrix. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + g = static_cast(malloc( m * n * sizeof(dcomplex) )); + h = static_cast(malloc( n * m * sizeof(dcomplex) )); + + bli_zrandm( 0, BLIS_DENSE, m, n, g, rs, cs ); + + bli_zsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + n, m, &minus_one_z, h, rsf, csf ); + + bli_zprintm( "g:", m, n, g, rs, cs, "%4.1f", "" ); + bli_zprintm( "h (initial value):", n, m, h, rsf, csf, "%4.1f", "" ); + + bli_zcopym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_CONJ_TRANSPOSE, + n, m, g, rs, cs, h, rsf, csf ); + + bli_zprintm( "h (copied value):", n, m, h, rsf, csf, "%4.1f", "" ); + + + // Free the memory obtained via malloc(). + free( a ); + free( b ); + free( c ); + free( d ); + free( e ); + free( f ); + free( g ); + free( h ); + + return hpx::finalize(); +} + +// ----------------------------------------------------------------------------- +int main(int argc, char ** argv) { + return hpx::init(argc, argv); +} diff --git a/examples/hpx/tapi/02level1m_diag.cpp b/examples/hpx/tapi/02level1m_diag.cpp new file mode 100644 index 0000000000..482d86ea65 --- /dev/null +++ b/examples/hpx/tapi/02level1m_diag.cpp @@ -0,0 +1,251 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +#include +#include + +int hpx_main( int argc, char** argv ) +{ + double* a; + double* b; + double* c; + double* d; + double* e; + double* h; + dim_t m, n; + inc_t rs, cs; + + // Initialize some basic constants. + double zero = 0.0; + double minus_one = -1.0; + + + // + // This file demonstrates level-1m operations on structured matrices. + // + + + // + // Example 1: Initialize the upper triangle of a matrix to random values. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // Create a matrix to work with. + m = 5; n = 5; rs = 1; cs = m; + a = static_cast(malloc( m * n * sizeof( double ) )); + + // Set the upper triangle to random values. + bli_drandm( 0, BLIS_UPPER, m, n, a, rs, cs ); + + bli_dprintm( "a: randomize upper part (lower part may contain garbage)", + m, n, a, rs, cs, "%4.1f", "" ); + + + // + // Example 2: Initialize the upper triangle of a matrix to random values + // but also explicitly set the strictly lower triangle to zero. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Create a matrix to work with. + m = 5; n = 5; rs = 1; cs = m; + b = static_cast(malloc( m * n * sizeof( double ) )); + + // Set the upper triangle to random values. + bli_drandm( 0, BLIS_UPPER, m, n, b, rs, cs ); + + // Set the strictly lower triangle of 'b' to zero (by setting the lower + // triangle of 'bl' to zero). + bli_dsetm( BLIS_NO_CONJUGATE, -1, BLIS_NONUNIT_DIAG, BLIS_LOWER, + m, n, &zero, b, rs, cs ); + + bli_dprintm( "b: randomize upper part; set strictly lower part to 0.0)", + m, n, b, rs, cs, "%4.1f", "" ); + + // You may not see the effect of setting the strictly lower part to zero, + // since those values may already be zero (instead of random junk). So + // let's set it to something you'll notice, like -1.0. + bli_dsetm( BLIS_NO_CONJUGATE, -1, BLIS_NONUNIT_DIAG, BLIS_LOWER, + m, n, &minus_one, b, rs, cs ); + + bli_dprintm( "b: randomize upper part; set strictly lower part to -1.0)", + m, n, b, rs, cs, "%4.1f", "" ); + + + // + // Example 3: Copy the lower triangle of an existing matrix to a newly + // created (but otherwise uninitialized) matrix. + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // Create a matrix to work with. + m = 5; n = 5; rs = 1; cs = m; + c = static_cast(malloc( m * n * sizeof( double ) )); + + bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_LOWER, BLIS_NO_TRANSPOSE, + m, n, b, rs, cs, c, rs, cs ); + + bli_dprintm( "c: copy lower part of b (upper part may contain garbage)", + m, n, c, rs, cs, "%4.1f", "" ); + + bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_LOWER, BLIS_NO_TRANSPOSE, + m, n, b, rs, cs, a, rs, cs ); + + bli_dprintm( "a: copy lower triangle of b to upper triangular a", + m, n, a, rs, cs, "%4.1f", "" ); + + + // + // Example 4: Copy the lower triangle of an existing object into the + // upper triangle of an existing object. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // Create a matrix to work with. + m = 5; n = 5; rs = 1; cs = m; + d = static_cast(malloc( m * n * sizeof( double ) )); + + // Let's start by setting entire destination matrix to zero. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, &zero, d, rs, cs ); + + bli_dprintm( "d: initial value (all zeros)", + m, n, d, rs, cs, "%4.1f", "" ); + + // Let's change a few values of b manually so we can later see the full + // effect of the transposition. + bli_dsetijm( 2.0, 0.0, 2, 0, b, rs, cs ); + bli_dsetijm( 3.0, 0.0, 3, 0, b, rs, cs ); + bli_dsetijm( 4.0, 0.0, 4, 0, b, rs, cs ); + bli_dsetijm( 3.1, 0.0, 2, 1, b, rs, cs ); + bli_dsetijm( 3.2, 0.0, 3, 2, b, rs, cs ); + + bli_dprintm( "b:", + m, n, b, rs, cs, "%4.1f", "" ); + + bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_LOWER, BLIS_TRANSPOSE, + m, n, b, rs, cs, d, rs, cs ); + + bli_dprintm( "d: transpose of lower triangle of b copied to d", + m, n, d, rs, cs, "%4.1f", "" ); + + + // + // Example 5: Create a rectangular matrix (m > n) with a lower trapezoid + // containing random values, then set the strictly upper + // triangle to zeros. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + // Create a matrix to work with. + m = 6; n = 4; rs = 1; cs = m; + e = static_cast(malloc( m * n * sizeof( double ) )); + + // Initialize the entire matrix to -1.0 to simulate junk values. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, &minus_one, e, rs, cs ); + + bli_dprintm( "e: initial value (all -1.0)", + m, n, e, rs, cs, "%4.1f", "" ); + + // Randomize the lower trapezoid. + bli_drandm( 0, BLIS_LOWER, m, n, e, rs, cs ); + + bli_dprintm( "e: after lower trapezoid randomized", + m, n, e, rs, cs, "%4.1f", "" ); + + // Set the upper triangle to zero. + bli_dsetm( BLIS_NO_CONJUGATE, 1, BLIS_NONUNIT_DIAG, BLIS_UPPER, + m, n, &zero, e, rs, cs ); + + bli_dprintm( "e: after upper triangle set to zero", + m, n, e, rs, cs, "%4.1f", "" ); + + + // + // Example 6: Create an upper Hessenberg matrix of random values and then + // set the "unstored" values to zero. + // + + printf( "\n#\n# -- Example 6 --\n#\n\n" ); + + // Create a matrix to work with. + m = 5; n = 5; rs = 1; cs = m; + h = static_cast(malloc( m * n * sizeof( double ) )); + + // Initialize the entire matrix to -1.0 to simulate junk values. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, &minus_one, h, rs, cs ); + + bli_dprintm( "h: initial value (all -1.0)", + m, n, h, rs, cs, "%4.1f", "" ); + + // Randomize the elements on and above the first subdiagonal. + bli_drandm( -1, BLIS_UPPER, m, n, h, rs, cs ); + + bli_dprintm( "h: after randomizing above first subdiagonal", + m, n, h, rs, cs, "%4.1f", "" ); + + // Set the region strictly below the first subdiagonal (on or below + // the second subdiagonal) to zero. + bli_dsetm( BLIS_NO_CONJUGATE, -2, BLIS_NONUNIT_DIAG, BLIS_LOWER, + m, n, &zero, h, rs, cs ); + + bli_dprintm( "h: after setting elements below first subdiagonal to zero", + m, n, h, rs, cs, "%4.1f", "" ); + + + // Free the memory obtained via malloc(). + free( a ); + free( b ); + free( c ); + free( d ); + free( e ); + free( h ); + + return hpx::finalize(); +} + +// ----------------------------------------------------------------------------- +int main(int argc, char ** argv) { + return hpx::init(argc, argv); +} diff --git a/examples/hpx/tapi/03level2.cpp b/examples/hpx/tapi/03level2.cpp new file mode 100644 index 0000000000..bf20f5376b --- /dev/null +++ b/examples/hpx/tapi/03level2.cpp @@ -0,0 +1,321 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +#include +#include + +int hpx_main( int argc, char** argv ) +{ + double* a; + double* x; + double* y; + double* b; + double alpha, beta; + dim_t m, n; + inc_t rs, cs; + + // Initialize some basic constants. + double zero = 0.0; + double one = 1.0; + double two = 2.0; + double minus_one = -1.0; + + + // + // This file demonstrates level-2 operations. + // + + + // + // Example 1: Perform a general rank-1 update (ger) operation. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + m = 4; n = 5; rs = 1; cs = m; + a = static_cast(malloc( m * n * sizeof( double ) )); + x = static_cast(malloc( m * 1 * sizeof( double ) )); + y = static_cast(malloc( 1 * n * sizeof( double ) )); + + // Let's initialize some scalars. + alpha = 1.0; + + // Initialize vectors 'x' and 'y'. + bli_drandv( m, x, 1 ); + bli_dsetv( BLIS_NO_CONJUGATE, n, &minus_one, y, 1 ); + + // Initialize 'a' to 1.0. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, &one, a, rs, cs ); + + bli_dprintm( "x: set to random values", m, 1, x, 1, m, "%4.1f", "" ); + bli_dprintm( "y: set to -1.0", 1, n, y, n, 1, "%4.1f", "" ); + bli_dprintm( "a: intial value", m, n, a, rs, cs, "%4.1f", "" ); + + // a := a + alpha * x * y, where 'a' is general. + bli_dger( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, + m, n, &alpha, x, 1, y, 1, a, rs, cs ); + + bli_dprintm( "a: after ger", m, n, a, rs, cs, "%4.1f", "" ); + + // Free the memory obtained via malloc(). + free( a ); + free( x ); + free( y ); + + + // + // Example 2: Perform a general matrix-vector multiply (gemv) operation. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + m = 4; n = 5; rs = 1; cs = m; + a = static_cast(malloc( m * n * sizeof( double ) )); + x = static_cast(malloc( 1 * n * sizeof( double ) )); + y = static_cast(malloc( 1 * m * sizeof( double ) )); + + // Set the scalars to use. + alpha = 1.0; + beta = 1.0; + + // Initialize vectors 'x' and 'y'. + bli_dsetv( BLIS_NO_CONJUGATE, n, &one, x, 1 ); + bli_dsetv( BLIS_NO_CONJUGATE, m, &zero, y, 1 ); + + // Randomize 'a'. + bli_drandm( 0, BLIS_DENSE, m, n, a, rs, cs ); + + bli_dprintm( "a: randomized", m, n, a, rs, cs, "%4.1f", "" ); + bli_dprintm( "x: set to 1.0", 1, n, x, n, 1, "%4.1f", "" ); + bli_dprintm( "y: intial value", 1, m, y, m, 1, "%4.1f", "" ); + + // y := beta * y + alpha * a * x, where 'a' is general. + bli_dgemv( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, + m, n, &alpha, a, rs, cs, x, 1, &beta, y, 1 ); + + bli_dprintm( "y: after gemv", 1, m, y, m, 1, "%4.1f", "" ); + + // Free the memory obtained via malloc(). + free( a ); + free( x ); + free( y ); + + + // + // Example 3: Perform a symmetric rank-1 update (syr) operation. + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + m = 5; rs = 1; cs = 5; + a = static_cast(malloc( m * m * sizeof( double ) )); + x = static_cast(malloc( 1 * m * sizeof( double ) )); + + // Set alpha. + alpha = 1.0; + + // Initialize vector 'x'. + bli_drandv( m, x, 1 ); + + // Zero out all of matrix 'a'. This is optional, but will avoid possibly + // displaying junk values in the unstored triangle. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, m, &zero, a, rs, cs ); + + // Randomize the lower triangle of 'a'. + bli_drandm( 0, BLIS_LOWER, m, m, a, rs, cs ); + + bli_dprintm( "x: set to random values", 1, m, x, m, 1, "%4.1f", "" ); + bli_dprintm( "a: initial value (zeros in upper triangle)", m, m, a, 1, m, "%4.1f", "" ); + + // a := a + alpha * x * x^T, where 'a' is symmetric and lower-stored. + bli_dsyr( BLIS_LOWER, BLIS_NO_CONJUGATE, m, &alpha, x, 1, a, rs, cs ); + + bli_dprintm( "a: after syr", m, m, a, 1, m, "%4.1f", "" ); + + // Free the memory obtained via malloc(). + free( a ); + free( x ); + + + // + // Example 4: Perform a symmetric matrix-vector multiply (symv) operation. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + m = 5;; rs = 1; cs = m; + a = static_cast(malloc( m * m * sizeof( double ) )); + x = static_cast(malloc( 1 * m * sizeof( double ) )); + y = static_cast(malloc( 1 * m * sizeof( double ) )); + + // Set the scalars to use. + alpha = 1.0; + beta = 1.0; + + // Initialize vectors 'x' and 'y'. + bli_dsetv( BLIS_NO_CONJUGATE, m, &one, x, 1 ); + bli_dsetv( BLIS_NO_CONJUGATE, m, &zero, y, 1 ); + + // Zero out all of matrix 'a'. This is optional, but will avoid possibly + // displaying junk values in the unstored triangle. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, m, &zero, a, rs, cs ); + + // Randomize 'a'. + bli_drandm( 0, BLIS_UPPER, m, m, a, rs, cs ); + + bli_dprintm( "a: randomized (zeros in lower triangle)", m, m, a, rs, cs, "%4.1f", "" ); + bli_dprintm( "x: set to 1.0", 1, m, x, m, 1, "%4.1f", "" ); + bli_dprintm( "y: intial value", 1, m, y, m, 1, "%4.1f", "" ); + + // y := beta * y + alpha * a * x, where 'a' is symmetric and upper-stored. + bli_dsymv( BLIS_UPPER, (conj_t)BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, + m, &alpha, a, rs, cs, x, 1, &beta, y, 1 ); + + bli_dprintm( "y: after symv", 1, m, y, m, 1, "%4.1f", "" ); + + // Free the memory obtained via malloc(). + free( a ); + free( x ); + free( y ); + + + // + // Example 5: Perform a triangular matrix-vector multiply (trmv) operation. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + m = 5;; rs = 1; cs = m; + a = static_cast(malloc( m * m * sizeof( double ) )); + x = static_cast(malloc( 1 * m * sizeof( double ) )); + + // Set the scalars to use. + alpha = 1.0; + + // Initialize vector 'x'. + bli_dsetv( BLIS_NO_CONJUGATE, m, &one, x, 1 ); + + // Zero out all of matrix 'a'. This is optional, but will avoid possibly + // displaying junk values in the unstored triangle. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, m, &zero, a, rs, cs ); + + // Randomize 'a'. + bli_drandm( 0, BLIS_LOWER, m, m, a, rs, cs ); + + bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rs, cs, "%4.1f", "" ); + bli_dprintm( "x: intial value", 1, m, x, m, 1, "%4.1f", "" ); + + // x := alpha * a * x, where 'a' is triangular and lower-stored. + bli_dtrmv( BLIS_LOWER, BLIS_NO_TRANSPOSE, BLIS_NONUNIT_DIAG, + m, &alpha, a, rs, cs, x, 1 ); + + bli_dprintm( "x: after trmv", 1, m, x, m, 1, "%4.1f", "" ); + + // Free the memory obtained via malloc(). + free( a ); + free( x ); + + + // + // Example 6: Perform a triangular solve (trsv) operation. + // + + printf( "\n#\n# -- Example 6 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + m = 5;; rs = 1; cs = m; + a = static_cast(malloc( m * m * sizeof( double ) )); + b = static_cast(malloc( 1 * m * sizeof( double ) )); + y = static_cast(malloc( 1 * m * sizeof( double ) )); + + // Set the scalars to use. + alpha = 1.0; + + // Initialize vector 'x'. + bli_dsetv( BLIS_NO_CONJUGATE, m, &one, b, 1 ); + + // Zero out all of matrix 'a'. This is optional, but will avoid possibly + // displaying junk values in the unstored triangle. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, m, &zero, a, rs, cs ); + + // Randomize 'a'. + bli_drandm( 0, BLIS_LOWER, m, m, a, rs, cs ); + + // Load the diagonal. By setting the diagonal to something of greater + // absolute value than the off-diagonal elements, we increase the odds + // that the matrix is not singular (singular matrices have no inverse). + bli_dshiftd( 0, m, m, &two, a, rs, cs ); + + bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rs, cs, "%4.1f", "" ); + bli_dprintm( "b: intial value", 1, m, b, m, 1, "%4.1f", "" ); + + // x := alpha * a * x, where 'a' is triangular and lower-stored. + bli_dtrsv( BLIS_LOWER, BLIS_NO_TRANSPOSE, BLIS_NONUNIT_DIAG, + m, &alpha, a, rs, cs, x, 1 ); + + bli_dprintm( "b: after trsv", 1, m, b, m, 1, "%4.1f", "" ); + + // We can confirm the solution by comparing the product of a and x to the + // original value of b. + bli_dcopyv( (conj_t)BLIS_NO_TRANSPOSE, m, b, 1, y, 1 ); + bli_dtrmv( BLIS_LOWER, BLIS_NO_TRANSPOSE, BLIS_NONUNIT_DIAG, + m, &alpha, a, rs, cs, y, 1 ); + + bli_dprintm( "y: should equal initial value of b", 1, m, y, m, 1, "%4.1f", "" ); + + // Free the memory obtained via malloc(). + free( a ); + free( b ); + free( y ); + + + return 0; +} + +int main(int argc, char ** argv) { + return hpx::init(argc, argv); +} diff --git a/examples/hpx/tapi/04level3.cpp b/examples/hpx/tapi/04level3.cpp new file mode 100644 index 0000000000..13fd970cb1 --- /dev/null +++ b/examples/hpx/tapi/04level3.cpp @@ -0,0 +1,349 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +#include +#include + +int hpx_main( int argc, char** argv ) +{ + dim_t m, n, k; + inc_t rsa, csa; + inc_t rsb, csb; + inc_t rsc, csc; + + double* a; + double* b; + double* c; + double alpha, beta; + + // Initialize some basic constants. + double zero = 0.0; + double one = 1.0; + double two = 2.0; + + + // + // This file demonstrates level-3 operations. + // + + + // + // Example 1: Perform a general matrix-matrix multiply (gemm) operation. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + m = 4; n = 5; k = 3; + rsc = 1; csc = m; + rsa = 1; csa = m; + rsb = 1; csb = k; + c = static_cast(malloc( m * n * sizeof( double ) )); + a = static_cast(malloc( m * k * sizeof( double ) )); + b = static_cast(malloc( k * n * sizeof( double ) )); + + // Set the scalars to use. + alpha = 1.0; + beta = 1.0; + + // Initialize the matrix operands. + bli_drandm( 0, BLIS_DENSE, m, k, a, rsa, csa ); + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + k, n, &one, b, rsb, csb ); + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, &zero, c, rsc, csc ); + + bli_dprintm( "a: randomized", m, k, a, rsa, csa, "%4.1f", "" ); + bli_dprintm( "b: set to 1.0", k, n, b, rsb, csb, "%4.1f", "" ); + bli_dprintm( "c: initial value", m, n, c, rsc, csc, "%4.1f", "" ); + + // c := beta * c + alpha * a * b, where 'a', 'b', and 'c' are general. + bli_dgemm( BLIS_NO_TRANSPOSE, BLIS_NO_TRANSPOSE, + m, n, k, &alpha, a, rsa, csa, b, rsb, csb, + &beta, c, rsc, csc ); + + bli_dprintm( "c: after gemm", m, n, c, rsc, csc, "%4.1f", "" ); + + // Free the memory obtained via malloc(). + free( a ); + free( b ); + free( c ); + + + // + // Example 1b: Perform a general matrix-matrix multiply (gemm) operation + // with the left input operand (matrix A) transposed. + // + + printf( "\n#\n# -- Example 1b --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + m = 4; n = 5; k = 3; + rsc = 1; csc = m; + rsa = 1; csa = k; + rsb = 1; csb = k; + c = static_cast(malloc( m * n * sizeof( double ) )); + a = static_cast(malloc( k * m * sizeof( double ) )); + b = static_cast(malloc( k * n * sizeof( double ) )); + + // Set the scalars to use. + alpha = 1.0; + beta = 1.0; + + // Initialize the matrix operands. + bli_drandm( 0, BLIS_DENSE, k, m, a, rsa, csa ); + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + k, n, &one, b, rsb, csb ); + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, &zero, c, rsc, csc ); + + bli_dprintm( "a: randomized", k, m, a, rsa, csa, "%4.1f", "" ); + bli_dprintm( "b: set to 1.0", k, n, b, rsb, csb, "%4.1f", "" ); + bli_dprintm( "c: initial value", m, n, c, rsc, csc, "%4.1f", "" ); + + // c := beta * c + alpha * a^T * b, where 'a', 'b', and 'c' are general. + bli_dgemm( BLIS_TRANSPOSE, BLIS_NO_TRANSPOSE, + m, n, k, &alpha, a, rsa, csa, b, rsb, csb, + &beta, c, rsc, csc ); + + bli_dprintm( "c: after gemm", m, n, c, rsc, csc, "%4.1f", "" ); + + // Free the memory obtained via malloc(). + free( a ); + free( b ); + free( c ); + + + // + // Example 2: Perform a symmetric rank-k update (syrk) operation. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + m = 5; k = 3; + rsc = 1; csc = m; + rsa = 1; csa = m; + c = static_cast(malloc( m * m * sizeof( double ) )); + a = static_cast(malloc( m * k * sizeof( double ) )); + + // Set the scalars to use. + alpha = 1.0; + + // Initialize the matrix operands. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, m, &zero, c, rsc, csc ); + bli_drandm( 0, BLIS_DENSE, m, k, a, rsa, csa ); + + // Randomize the lower triangle of 'c'. + bli_drandm( 0, BLIS_LOWER, m, n, c, rsc, csc ); + + bli_dprintm( "a: set to random values", m, k, a, rsa, csa, "%4.1f", "" ); + bli_dprintm( "c: initial value (zeros in upper triangle)", m, m, c, rsc, csc, "%4.1f", "" ); + + // c := c + alpha * a * a^T, where 'c' is symmetric and lower-stored. + bli_dsyrk( BLIS_LOWER, BLIS_NO_TRANSPOSE, + m, k, &alpha, a, rsa, csa, + &beta, c, rsc, csc ); + + bli_dprintm( "c: after syrk", m, m, c, rsc, csc, "%4.1f", "" ); + + // Free the memory obtained via malloc(). + free( a ); + free( c ); + + + // + // Example 3: Perform a symmetric matrix-matrix multiply (symm) operation. + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + m = 5; n = 6; + rsc = 1; csc = m; + rsa = 1; csa = m; + rsb = 1; csb = m; + c = static_cast(malloc( m * n * sizeof( double ) )); + a = static_cast(malloc( m * m * sizeof( double ) )); + b = static_cast(malloc( m * n * sizeof( double ) )); + + // Set the scalars to use. + alpha = 1.0; + beta = 1.0; + + // Initialize matrices 'b' and 'c'. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, &one, b, rsb, csb ); + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, &zero, c, rsc, csc ); + + // Zero out all of matrix 'a'. This is optional, but will avoid possibly + // displaying junk values in the unstored triangle. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, m, &zero, a, rsa, csa ); + + // Randomize the upper triangle of 'a'. + bli_drandm( 0, BLIS_UPPER, m, m, a, rsa, csa ); + + bli_dprintm( "a: randomized (zeros in lower triangle)", m, m, a, rsa, csa, "%4.1f", "" ); + bli_dprintm( "b: set to 1.0", m, n, b, rsb, csb, "%4.1f", "" ); + bli_dprintm( "c: initial value", m, n, c, rsc, csc, "%4.1f", "" ); + + // c := beta * c + alpha * a * b, where 'a' is symmetric and upper-stored. + bli_dsymm( BLIS_LEFT, BLIS_UPPER, BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, + m, n, &alpha, a, rsa, csa, b, rsb, csb, + &beta, c, rsc, csc ); + + bli_dprintm( "c: after symm", m, n, c, rsc, csc, "%4.1f", "" ); + + // Free the memory obtained via malloc(). + free( a ); + free( b ); + free( c ); + + + // + // Example 4: Perform a triangular matrix-matrix multiply (trmm) operation. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + m = 5; n = 4; + rsa = 1; csa = m; + rsb = 1; csb = m; + a = static_cast(malloc( m * m * sizeof( double ) )); + b = static_cast(malloc( m * n * sizeof( double ) )); + + // Set the scalars to use. + alpha = 1.0; + + // Initialize matrix 'b'. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, &one, b, rsb, csb ); + + // Zero out all of matrix 'a'. This is optional, but will avoid possibly + // displaying junk values in the unstored triangle. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, m, &zero, a, rsa, csa ); + + // Randomize the lower triangle of 'a'. + bli_drandm( 0, BLIS_LOWER, m, m, a, rsa, csa ); + + bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rsa, csa, "%4.1f", "" ); + bli_dprintm( "b: initial value", m, n, b, rsb, csb, "%4.1f", "" ); + + // b := alpha * a * b, where 'a' is triangular and lower-stored. + bli_dtrmm( BLIS_LEFT, BLIS_LOWER, (trans_t)BLIS_NONUNIT_DIAG, (diag_t)BLIS_NO_TRANSPOSE, + m, n, &alpha, a, rsa, csa, b, rsb, csb ); + + bli_dprintm( "b: after trmm", m, n, b, rsb, csb, "%4.1f", "" ); + + // Free the memory obtained via malloc(). + free( a ); + free( b ); + + + // + // Example 5: Perform a triangular solve with multiple right-hand sides + // (trsm) operation. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + // Create some matrix and vector operands to work with. + m = 5; n = 4; + rsa = 1; csa = m; + rsb = 1; csb = m; + rsc = 1; csc = m; + a = static_cast(malloc( m * m * sizeof( double ) )); + b = static_cast(malloc( m * n * sizeof( double ) )); + c = static_cast(malloc( m * n * sizeof( double ) )); + + // Set the scalars to use. + alpha = 1.0; + + // Initialize matrix 'b'. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, &one, b, rsb, csb ); + + // Zero out all of matrix 'a'. This is optional, but will avoid possibly + // displaying junk values in the unstored triangle. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, m, &zero, a, rsa, csa ); + + // Randomize the lower triangle of 'a'. + bli_drandm( 0, BLIS_LOWER, m, m, a, rsa, csa ); + + // Load the diagonal. By setting the diagonal to something of greater + // absolute value than the off-diagonal elements, we increase the odds + // that the matrix is not singular (singular matrices have no inverse). + bli_dshiftd( 0, m, m, &two, a, rsa, csa ); + + bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rsa, csa, "%4.1f", "" ); + bli_dprintm( "b: initial value", m, n, b, rsb, csb, "%4.1f", "" ); + + // solve a * x = alpha * b, where 'a' is triangular and lower-stored, and + // overwrite b with the solution matrix x. + bli_dtrsm( BLIS_LEFT, BLIS_LOWER, (trans_t)BLIS_NONUNIT_DIAG, (diag_t)BLIS_NO_TRANSPOSE, + m, n, &alpha, a, rsa, csa, b, rsb, csb ); + + bli_dprintm( "b: after trmm", m, n, b, rsb, csb, "%4.1f", "" ); + + // We can confirm the solution by comparing the product of a and x to the + // original value of b. + bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, + m, n, b, rsb, csb, c, rsc, csc ); + bli_dtrmm( BLIS_LEFT, BLIS_LOWER, (trans_t)BLIS_NONUNIT_DIAG, (diag_t)BLIS_NO_TRANSPOSE, + m, n, &alpha, a, rsa, csa, c, rsc, csc ); + + bli_dprintm( "c: should equal initial value of b", m, n, c, rsc, csc, "%4.1f", "" ); + + // Free the memory obtained via malloc(). + free( a ); + free( b ); + free( c ); + + + return hpx::finalize(); +} + +// ----------------------------------------------------------------------------- +int main(int argc, char** argv) { + return hpx::init(argc, argv); +} diff --git a/examples/hpx/tapi/05util.cpp b/examples/hpx/tapi/05util.cpp new file mode 100644 index 0000000000..9b51207fb9 --- /dev/null +++ b/examples/hpx/tapi/05util.cpp @@ -0,0 +1,287 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +#include +#include + +int hpx_main( int argc, char** argv ) +{ + double* x; + dcomplex* y; + double* a; + dcomplex* b; + double* c; + double* d; + dcomplex* e; + dcomplex* f; + double* g; + double norm1, normi, normf; + dim_t m, n; + inc_t rs, cs; + + // Initialize some basic constants. + double minus_one = -1.0; + dcomplex minus_one_z = { -1.0, 0.0 }; + + + // + // This file demonstrates working with vector and matrices in the + // context of various utility operations. + // + + + // + // Example 1: Compute various vector norms. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // Create a few matrices to work with. + m = 1; n = 5; rs = 5; cs = 1; + x = static_cast(malloc( m * n * sizeof( double ) )); + y = static_cast(malloc( m * n * sizeof( dcomplex ) )); + + // Initialize the vectors to random values. + bli_drandv( n, x, 1 ); + bli_zrandv( n, y, 1 ); + + bli_dprintm( "x", m, n, x, rs, cs, "%4.1f", "" ); + + // Compute the one, infinity, and frobenius norms of 'x'. Note that when + // computing the norm alpha of a vector 'x', the datatype of alpha must be + // equal to the real projection of the datatype of 'x'. + bli_dnorm1v( n, x, 1, &norm1 ); + bli_dnormiv( n, x, 1, &normi ); + bli_dnormfv( n, x, 1, &normf ); + + bli_dprintm( "x: 1-norm:", 1, 1, &norm1, rs, cs, "%4.1f", "" ); + bli_dprintm( "x: infinity norm:", 1, 1, &normi, rs, cs, "%4.1f", "" ); + bli_dprintm( "x: frobenius norm:", 1, 1, &normf, rs, cs, "%4.1f", "" ); + + bli_zprintm( "y", m, n, y, rs, cs, "%4.1f", "" ); + + // Compute the one, infinity, and frobenius norms of 'y'. Note that we + // can reuse the same scalars from before for computing norms of + // dcomplex matrices, since the real projection of dcomplex is double. + bli_znorm1v( n, y, 1, &norm1 ); + bli_znormiv( n, y, 1, &normi ); + bli_znormfv( n, y, 1, &normf ); + + bli_dprintm( "y: 1-norm:", 1, 1, &norm1, 1, 1, "%4.1f", "" ); + bli_dprintm( "y: infinity norm:", 1, 1, &normi, 1, 1, "%4.1f", "" ); + bli_dprintm( "y: frobenius norm:", 1, 1, &normf, 1, 1, "%4.1f", "" ); + + + // + // Example 2: Compute various matrix norms. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Create a few matrices to work with. + m = 5; n = 6; rs = 1; cs = m; + a = static_cast(malloc( m * n * sizeof( double ) )); + b = static_cast(malloc( m * n * sizeof( dcomplex ) )); + + // Initialize the matrices to random values. + bli_drandm( 0, BLIS_DENSE, m, n, a, rs, cs ); + bli_zrandm( 0, BLIS_DENSE, m, n, b, rs, cs ); + + bli_dprintm( "a:", m, n, a, rs, cs, "%4.1f", "" ); + + // Compute the one-norm of 'a'. + bli_dnorm1m( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, a, rs, cs, &norm1 ); + bli_dnormim( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, a, rs, cs, &normi ); + bli_dnormfm( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, a, rs, cs, &normf ); + + bli_dprintm( "a: 1-norm:", 1, 1, &norm1, 1, 1, "%4.1f", "" ); + bli_dprintm( "a: infinity norm:", 1, 1, &normi, 1, 1, "%4.1f", "" ); + bli_dprintm( "a: frobenius norm:", 1, 1, &normf, 1, 1, "%4.1f", "" ); + + bli_zprintm( "b:", m, n, b, rs, cs, "%4.1f", "" ); + + // Compute the one-norm of 'b'. + bli_znorm1m( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, b, rs, cs, &norm1 ); + bli_znormim( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, b, rs, cs, &normi ); + bli_znormfm( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, n, b, rs, cs, &normf ); + + bli_dprintm( "a: 1-norm:", 1, 1, &norm1, 1, 1, "%4.1f", "" ); + bli_dprintm( "a: infinity norm:", 1, 1, &normi, 1, 1, "%4.1f", "" ); + bli_dprintm( "a: frobenius norm:", 1, 1, &normf, 1, 1, "%4.1f", "" ); + + + // + // Example 3: Make a real matrix explicitly symmetric (or Hermitian). + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // Create a few matrices to work with. + m = 4; n = 4; rs = 1; cs = m; + c = static_cast(malloc( m * m * sizeof( double ) )); + d = static_cast(malloc( m * m * sizeof( double ) )); + + // Initialize all of 'c' to -1.0 to simulate junk values. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, m, &minus_one, c, rs, cs ); + + // Randomize the lower triangle of 'c'. + bli_drandm( 0, BLIS_LOWER, m, m, c, rs, cs ); + + bli_dprintm( "c (initial state):", m, m, c, rs, cs, "%4.1f", "" ); + + // mksymm on a real matrix transposes the stored triangle into the + // unstored triangle, making the matrix densely symmetric. + bli_dmksymm( BLIS_LOWER, m, c, rs, cs ); + + bli_dprintm( "c (after mksymm on lower triangle):", m, m, c, rs, cs, "%4.1f", "" ); + + // Digression: Most people think only of complex matrices as being able + // to be complex. However, in BLIS, we define Hermitian operations on + // real matrices, too--they are simply equivalent to the corresponding + // symmetric operation. For example, when we make a real matrix explicitly + // Hermitian, the result is indistinguishable from making it symmetric. + + // Initialize all of 'd' to -1.0 to simulate junk values. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, m, &minus_one, d, rs, cs ); + + // Randomize the lower triangle of 'd'. + bli_drandm( 0, BLIS_LOWER, m, m, d, rs, cs ); + + bli_dprintm( "d (initial state):", m, m, d, rs, cs, "%4.1f", "" ); + + // mkherm on a real matrix behaves the same as mksymm, as there are no + // imaginary elements to conjugate. + bli_dmkherm( BLIS_LOWER, m, d, rs, cs ); + + bli_dprintm( "c (after mkherm on lower triangle):", m, m, d, rs, cs, "%4.1f", "" ); + + + // + // Example 4: Make a complex matrix explicitly symmetric or Hermitian. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // Create a few matrices to work with. + m = 4; n = 4; rs = 1; cs = m; + e = static_cast(malloc( m * m * sizeof( dcomplex ) )); + f = static_cast(malloc( m * m * sizeof( dcomplex ) )); + + // Initialize all of 'e' to -1.0 to simulate junk values. + bli_zsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, m, &minus_one_z, e, rs, cs ); + + // Randomize the upper triangle of 'e'. + bli_zrandm( 0, BLIS_UPPER, m, m, e, rs, cs ); + + bli_zprintm( "e (initial state):", m, m, e, rs, cs, "%4.1f", "" ); + + // mksymm on a complex matrix transposes the stored triangle into the + // unstored triangle. + bli_zmksymm( BLIS_UPPER, m, e, rs, cs ); + + bli_zprintm( "e (after mksymm on lower triangle):", m, m, e, rs, cs, "%4.1f", "" ); + + // Initialize all of 'f' to -1.0 to simulate junk values. + bli_zsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, m, &minus_one_z, f, rs, cs ); + + // Randomize the upper triangle of 'd'. + bli_zrandm( 0, BLIS_UPPER, m, m, f, rs, cs ); + + bli_zprintm( "f (initial state):", m, m, f, rs, cs, "%4.1f", "" ); + + // mkherm on a real matrix behaves the same as mksymm, as there are no + // imaginary elements to conjugate. + bli_zmkherm( BLIS_UPPER, m, f, rs, cs ); + + bli_zprintm( "f (after mkherm on lower triangle):", m, m, f, rs, cs, "%4.1f", "" ); + + + // + // Example 5: Make a real matrix explicitly triangular. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + // Create a few matrices to work with. + m = 5; n = 5; rs = 1; cs = m; + g = static_cast(malloc( m * m * sizeof( double ) )); + + // Initialize all of 'g' to -1.0 to simulate junk values. + bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, + m, m, &minus_one, g, rs, cs ); + + // Randomize the lower triangle of 'g'. + bli_drandm( 0, BLIS_LOWER, m, m, g, rs, cs ); + + bli_dprintm( "g (initial state):", m, m, g, rs, cs, "%4.1f", "" ); + + // mktrim does not explicitly copy any data, since presumably the stored + // triangle already contains the data of interest. However, mktrim does + // explicitly writes zeros to the unstored region. + bli_dmktrim( BLIS_LOWER, m, g, rs, cs ); + + bli_dprintm( "g (after mktrim):", m, m, g, rs, cs, "%4.1f", "" ); + + + // Free the memory obtained via malloc(). + free( x ); + free( y ); + free( a ); + free( b ); + free( c ); + free( d ); + free( e ); + free( f ); + free( g ); + + return hpx::finalize(); +} + +// ----------------------------------------------------------------------------- +int main(int argc, char ** argv) { + return hpx::init(argc, argv); +} diff --git a/examples/hpx/tapi/Makefile b/examples/hpx/tapi/Makefile new file mode 100644 index 0000000000..a6d6ed12a8 --- /dev/null +++ b/examples/hpx/tapi/Makefile @@ -0,0 +1,12 @@ +-include $(SHARE_PATH)/common.mk + +all: + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application_debug` -DHPX_APPLICATION_NAME=00level1v -o 00level1v 00level1v.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application_debug` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application_debug` -DHPX_APPLICATION_NAME=01level1m -o 01level1m 01level1m.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application_debug` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application_debug` -DHPX_APPLICATION_NAME=02level1m_diag -o 02level1m_diag 02level1m_diag.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application_debug` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application_debug` -DHPX_APPLICATION_NAME=03level2 -o 03level2 03level2.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application_debug` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application_debug` -DHPX_APPLICATION_NAME=04level3 -o 04level3 04level3.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application_debug` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application_debug` -DHPX_APPLICATION_NAME=05util -o 05util 05util.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application_debug` -lblis + +clean: + rm 00level1v 02level1m_diag 03level2 04level3 05util From f034e1a58f02c3c28d4cb1e2a763b8dd184b0b0f Mon Sep 17 00:00:00 2001 From: ctaylor Date: Tue, 1 Nov 2022 19:47:57 -0500 Subject: [PATCH 09/21] initial import --- Makefile | 1 + build/bli_config.h.in | 1 + build/config.mk.in | 1 + common.mk | 1 + configure | 1 + frame/include/bli_type_defs.h | 1 + frame/thread/bli_thrcomm.h | 1 + frame/thread/bli_thrcomm_hpx.h | 1 + frame/thread/bli_thrcomm_hpx_impl.cpp | 2 +- frame/thread/bli_thrcomm_hpx_impl.hpp | 1 + frame/thread/bli_thread.c | 1 + frame/thread/bli_thread_hpx.h | 1 + frame/thread/bli_thread_hpx_impl.cpp | 1 + frame/thread/bli_thread_hpx_impl.hpp | 1 + 14 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 87356f9978..f7515ddd92 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,7 @@ # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2022, Advanced Micro Devices, Inc. +# Copyright (C) 2022 Tactical Computing Laboratories, LLC # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are diff --git a/build/bli_config.h.in b/build/bli_config.h.in index 716b6e22fc..0b9cba1412 100644 --- a/build/bli_config.h.in +++ b/build/bli_config.h.in @@ -6,6 +6,7 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/build/config.mk.in b/build/config.mk.in index 4624220cf0..46d349add5 100644 --- a/build/config.mk.in +++ b/build/config.mk.in @@ -6,6 +6,7 @@ # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2022, Advanced Micro Devices, Inc. +# Copyright (C) 2022 Tactical Computing Laboratories, LLC # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are diff --git a/common.mk b/common.mk index 2b7d18b03f..db32e050df 100644 --- a/common.mk +++ b/common.mk @@ -5,6 +5,7 @@ # libraries. # # Copyright (C) 2014, The University of Texas at Austin +# Copyright (C) 2022 Tactical Computing Laboratories, LLC # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are diff --git a/configure b/configure index ee548c3eac..4f0209a157 100755 --- a/configure +++ b/configure @@ -6,6 +6,7 @@ # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2020-2022, Advanced Micro Devices, Inc. +# Copyright (C) 2022 Tactical Computing Laboratories, LLC # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 014be18b77..436536f3b8 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -7,6 +7,7 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/frame/thread/bli_thrcomm.h b/frame/thread/bli_thrcomm.h index 86b81e8c48..da9487b37f 100644 --- a/frame/thread/bli_thrcomm.h +++ b/frame/thread/bli_thrcomm.h @@ -6,6 +6,7 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/frame/thread/bli_thrcomm_hpx.h b/frame/thread/bli_thrcomm_hpx.h index a39e1cbce6..73bcbcd1ef 100644 --- a/frame/thread/bli_thrcomm_hpx.h +++ b/frame/thread/bli_thrcomm_hpx.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/frame/thread/bli_thrcomm_hpx_impl.cpp b/frame/thread/bli_thrcomm_hpx_impl.cpp index 95591c855a..7268c52cbf 100644 --- a/frame/thread/bli_thrcomm_hpx_impl.cpp +++ b/frame/thread/bli_thrcomm_hpx_impl.cpp @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/frame/thread/bli_thrcomm_hpx_impl.hpp b/frame/thread/bli_thrcomm_hpx_impl.hpp index 36e3ab49ad..b12f54cdd2 100644 --- a/frame/thread/bli_thrcomm_hpx_impl.hpp +++ b/frame/thread/bli_thrcomm_hpx_impl.hpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/frame/thread/bli_thread.c b/frame/thread/bli_thread.c index 4a109138f9..c5df371086 100644 --- a/frame/thread/bli_thread.c +++ b/frame/thread/bli_thread.c @@ -6,6 +6,7 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/frame/thread/bli_thread_hpx.h b/frame/thread/bli_thread_hpx.h index 98887c7927..336b16e65a 100644 --- a/frame/thread/bli_thread_hpx.h +++ b/frame/thread/bli_thread_hpx.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/frame/thread/bli_thread_hpx_impl.cpp b/frame/thread/bli_thread_hpx_impl.cpp index f1f4a36888..1922fcc664 100644 --- a/frame/thread/bli_thread_hpx_impl.cpp +++ b/frame/thread/bli_thread_hpx_impl.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/frame/thread/bli_thread_hpx_impl.hpp b/frame/thread/bli_thread_hpx_impl.hpp index a21ea466e8..b08e6c0d5b 100644 --- a/frame/thread/bli_thread_hpx_impl.hpp +++ b/frame/thread/bli_thread_hpx_impl.hpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are From 62b2f443a95f6a13748055bdf6c6dc57f2313787 Mon Sep 17 00:00:00 2001 From: ctaylor Date: Tue, 1 Nov 2022 19:55:33 -0500 Subject: [PATCH 10/21] initial import --- examples/hpx/oapi/00obj_basic.cpp | 1 + examples/hpx/oapi/01obj_attach.cpp | 1 + examples/hpx/oapi/02obj_ij.cpp | 1 + examples/hpx/oapi/03obj_view.cpp | 1 + examples/hpx/oapi/04level0.cpp | 1 + examples/hpx/oapi/05level1v.cpp | 1 + examples/hpx/oapi/06level1m.cpp | 1 + examples/hpx/oapi/07level1m_diag.cpp | 1 + examples/hpx/oapi/08level2.cpp | 1 + examples/hpx/oapi/09level3.cpp | 1 + examples/hpx/oapi/Makefile | 31 ++++++++++++++++++++++++++++ examples/hpx/tapi/00level1v.cpp | 1 + examples/hpx/tapi/01level1m.cpp | 1 + examples/hpx/tapi/02level1m_diag.cpp | 1 + examples/hpx/tapi/03level2.cpp | 1 + examples/hpx/tapi/04level3.cpp | 1 + examples/hpx/tapi/05util.cpp | 1 + examples/hpx/tapi/Makefile | 31 ++++++++++++++++++++++++++++ 18 files changed, 78 insertions(+) diff --git a/examples/hpx/oapi/00obj_basic.cpp b/examples/hpx/oapi/00obj_basic.cpp index 4c06ee5853..6f8b91a972 100644 --- a/examples/hpx/oapi/00obj_basic.cpp +++ b/examples/hpx/oapi/00obj_basic.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/oapi/01obj_attach.cpp b/examples/hpx/oapi/01obj_attach.cpp index 22a9c0113c..4c5304d6ef 100644 --- a/examples/hpx/oapi/01obj_attach.cpp +++ b/examples/hpx/oapi/01obj_attach.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/oapi/02obj_ij.cpp b/examples/hpx/oapi/02obj_ij.cpp index 96c9e411c0..7366b01660 100644 --- a/examples/hpx/oapi/02obj_ij.cpp +++ b/examples/hpx/oapi/02obj_ij.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/oapi/03obj_view.cpp b/examples/hpx/oapi/03obj_view.cpp index a751cb82bf..55d6588fbe 100644 --- a/examples/hpx/oapi/03obj_view.cpp +++ b/examples/hpx/oapi/03obj_view.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/oapi/04level0.cpp b/examples/hpx/oapi/04level0.cpp index e334b1a44f..60853fa685 100644 --- a/examples/hpx/oapi/04level0.cpp +++ b/examples/hpx/oapi/04level0.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/oapi/05level1v.cpp b/examples/hpx/oapi/05level1v.cpp index 4fcb0ed45e..197ffab374 100644 --- a/examples/hpx/oapi/05level1v.cpp +++ b/examples/hpx/oapi/05level1v.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/oapi/06level1m.cpp b/examples/hpx/oapi/06level1m.cpp index 039aa1ef2f..12832ce4f9 100644 --- a/examples/hpx/oapi/06level1m.cpp +++ b/examples/hpx/oapi/06level1m.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/oapi/07level1m_diag.cpp b/examples/hpx/oapi/07level1m_diag.cpp index ec656c58b4..5fb00fa188 100644 --- a/examples/hpx/oapi/07level1m_diag.cpp +++ b/examples/hpx/oapi/07level1m_diag.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/oapi/08level2.cpp b/examples/hpx/oapi/08level2.cpp index 659e10ddd8..b2d19d3fb0 100644 --- a/examples/hpx/oapi/08level2.cpp +++ b/examples/hpx/oapi/08level2.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/oapi/09level3.cpp b/examples/hpx/oapi/09level3.cpp index 7aad1a13a8..02b86366fa 100644 --- a/examples/hpx/oapi/09level3.cpp +++ b/examples/hpx/oapi/09level3.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/oapi/Makefile b/examples/hpx/oapi/Makefile index a8c587b066..2b8bf0d7d4 100644 --- a/examples/hpx/oapi/Makefile +++ b/examples/hpx/oapi/Makefile @@ -1,3 +1,34 @@ +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# Copyright (C) 2022 Tactical Computing Laboratories, LLC +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name of The University of Texas nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# -include $(SHARE_PATH)/common.mk all: diff --git a/examples/hpx/tapi/00level1v.cpp b/examples/hpx/tapi/00level1v.cpp index 83289c3274..2afed6d88d 100644 --- a/examples/hpx/tapi/00level1v.cpp +++ b/examples/hpx/tapi/00level1v.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/tapi/01level1m.cpp b/examples/hpx/tapi/01level1m.cpp index 8337a12edc..5c8ca8bb8f 100644 --- a/examples/hpx/tapi/01level1m.cpp +++ b/examples/hpx/tapi/01level1m.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/tapi/02level1m_diag.cpp b/examples/hpx/tapi/02level1m_diag.cpp index 482d86ea65..5ab11eec62 100644 --- a/examples/hpx/tapi/02level1m_diag.cpp +++ b/examples/hpx/tapi/02level1m_diag.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/tapi/03level2.cpp b/examples/hpx/tapi/03level2.cpp index bf20f5376b..bb7a715f79 100644 --- a/examples/hpx/tapi/03level2.cpp +++ b/examples/hpx/tapi/03level2.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/tapi/04level3.cpp b/examples/hpx/tapi/04level3.cpp index 13fd970cb1..8a3bb84131 100644 --- a/examples/hpx/tapi/04level3.cpp +++ b/examples/hpx/tapi/04level3.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/tapi/05util.cpp b/examples/hpx/tapi/05util.cpp index 9b51207fb9..8bce336fca 100644 --- a/examples/hpx/tapi/05util.cpp +++ b/examples/hpx/tapi/05util.cpp @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/examples/hpx/tapi/Makefile b/examples/hpx/tapi/Makefile index a6d6ed12a8..71aa82a1fe 100644 --- a/examples/hpx/tapi/Makefile +++ b/examples/hpx/tapi/Makefile @@ -1,3 +1,34 @@ +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# Copyright (C) 2022 Tactical Computing Laboratories, LLC +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name of The University of Texas nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# -include $(SHARE_PATH)/common.mk all: From c377074e7d8514866823f41a89e50b490b835997 Mon Sep 17 00:00:00 2001 From: ctaylor Date: Wed, 2 Nov 2022 08:49:53 -0500 Subject: [PATCH 11/21] initial import --- examples/hpx/tapi/03level2.cpp | 2 +- examples/hpx/tapi/Makefile | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/hpx/tapi/03level2.cpp b/examples/hpx/tapi/03level2.cpp index bb7a715f79..991dbb3e2e 100644 --- a/examples/hpx/tapi/03level2.cpp +++ b/examples/hpx/tapi/03level2.cpp @@ -314,7 +314,7 @@ int hpx_main( int argc, char** argv ) free( y ); - return 0; + return hpx::finalize(); } int main(int argc, char ** argv) { diff --git a/examples/hpx/tapi/Makefile b/examples/hpx/tapi/Makefile index 71aa82a1fe..67c129c2a1 100644 --- a/examples/hpx/tapi/Makefile +++ b/examples/hpx/tapi/Makefile @@ -32,12 +32,12 @@ -include $(SHARE_PATH)/common.mk all: - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application_debug` -DHPX_APPLICATION_NAME=00level1v -o 00level1v 00level1v.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application_debug` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application_debug` -DHPX_APPLICATION_NAME=01level1m -o 01level1m 01level1m.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application_debug` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application_debug` -DHPX_APPLICATION_NAME=02level1m_diag -o 02level1m_diag 02level1m_diag.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application_debug` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application_debug` -DHPX_APPLICATION_NAME=03level2 -o 03level2 03level2.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application_debug` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application_debug` -DHPX_APPLICATION_NAME=04level3 -o 04level3 04level3.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application_debug` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application_debug` -DHPX_APPLICATION_NAME=05util -o 05util 05util.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application_debug` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=00level1v -o 00level1v 00level1v.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=01level1m -o 01level1m 01level1m.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=02level1m_diag -o 02level1m_diag 02level1m_diag.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=03level2 -o 03level2 03level2.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=04level3 -o 04level3 04level3.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis + $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=05util -o 05util 05util.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis clean: - rm 00level1v 02level1m_diag 03level2 04level3 05util + rm 00level1v 01level1m 02level1m_diag 03level2 04level3 05util From 802088f806be871699b035ac9f106e82ea0a17c5 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Fri, 4 Nov 2022 15:56:06 -0500 Subject: [PATCH 12/21] Remove hpx examples. They're not really necessary. An HPX user would need to add the hpx initialization and link the libraries in their own code in any case--the actual BLIS usage is unchanged from the "standard" examples. --- examples/hpx/oapi/00obj_basic.cpp | 253 ------------------- examples/hpx/oapi/01obj_attach.cpp | 183 -------------- examples/hpx/oapi/02obj_ij.cpp | 277 --------------------- examples/hpx/oapi/03obj_view.cpp | 286 ---------------------- examples/hpx/oapi/04level0.cpp | 189 --------------- examples/hpx/oapi/05level1v.cpp | 190 --------------- examples/hpx/oapi/06level1m.cpp | 236 ------------------ examples/hpx/oapi/07level1m_diag.cpp | 338 -------------------------- examples/hpx/oapi/08level2.cpp | 334 ------------------------- examples/hpx/oapi/09level3.cpp | 339 -------------------------- examples/hpx/oapi/Makefile | 48 ---- examples/hpx/tapi/00level1v.cpp | 192 --------------- examples/hpx/tapi/01level1m.cpp | 225 ----------------- examples/hpx/tapi/02level1m_diag.cpp | 252 ------------------- examples/hpx/tapi/03level2.cpp | 322 ------------------------ examples/hpx/tapi/04level3.cpp | 350 --------------------------- examples/hpx/tapi/05util.cpp | 288 ---------------------- examples/hpx/tapi/Makefile | 43 ---- 18 files changed, 4345 deletions(-) delete mode 100644 examples/hpx/oapi/00obj_basic.cpp delete mode 100644 examples/hpx/oapi/01obj_attach.cpp delete mode 100644 examples/hpx/oapi/02obj_ij.cpp delete mode 100644 examples/hpx/oapi/03obj_view.cpp delete mode 100644 examples/hpx/oapi/04level0.cpp delete mode 100644 examples/hpx/oapi/05level1v.cpp delete mode 100644 examples/hpx/oapi/06level1m.cpp delete mode 100644 examples/hpx/oapi/07level1m_diag.cpp delete mode 100644 examples/hpx/oapi/08level2.cpp delete mode 100644 examples/hpx/oapi/09level3.cpp delete mode 100644 examples/hpx/oapi/Makefile delete mode 100644 examples/hpx/tapi/00level1v.cpp delete mode 100644 examples/hpx/tapi/01level1m.cpp delete mode 100644 examples/hpx/tapi/02level1m_diag.cpp delete mode 100644 examples/hpx/tapi/03level2.cpp delete mode 100644 examples/hpx/tapi/04level3.cpp delete mode 100644 examples/hpx/tapi/05util.cpp delete mode 100644 examples/hpx/tapi/Makefile diff --git a/examples/hpx/oapi/00obj_basic.cpp b/examples/hpx/oapi/00obj_basic.cpp deleted file mode 100644 index 6f8b91a972..0000000000 --- a/examples/hpx/oapi/00obj_basic.cpp +++ /dev/null @@ -1,253 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include "blis.h" - -#include -#include - -int hpx_main( int argc, char** argv ) -{ - obj_t a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11; - obj_t v1, v2; - num_t dt; - dim_t m, n; - inc_t rs, cs; - - - // - // This file demonstrates the basics of creating objects in BLIS, - // inspecting their basic properties, and printing matrix objects. - // - - - // - // Example 1: Create an object containing a 4x3 matrix of double- - // precision real elements stored in column-major order. - // - - // The matrix dimensions are m = 4 and n = 3. We choose to use column - // storage (often called column-major storage) and thus we specify - // that the row stride ("rs" for short) argument is 1 and the column - // stride ("cs" for short) argument is equal to m = 4. In column - // storage, cs is known as the leading dimension. - dt = BLIS_DOUBLE; m = 4; n = 3; - rs = 1; cs = 4; - bli_obj_create( dt, m, n, rs, cs, &a1 ); - - // If cs is greater than m, then extra rows (in this case, two) will - // be allocated beyond the lower edge of the matrix. Sometimes this - // is desireable for alignment purposes. - dt = BLIS_DOUBLE; m = 4; n = 3; - rs = 1; cs = 6; - bli_obj_create( dt, m, n, rs, cs, &a2 ); - - - // - // Example 2: Create an object containing a 4x3 matrix of double- - // precision real elements stored in row-major order. - // - - // Here, we choose to use row storage (often called row-major storage) - // and thus we specify that the cs is 1 and rs is equal to n = 3. In - // row storage, the leading dimension corresponds to rs. - dt = BLIS_DOUBLE; m = 4; n = 3; - rs = 3; cs = 1; - bli_obj_create( dt, m, n, rs, cs, &a3 ); - - // As with the second example, we can cause extra columns (in this - // case, five) to be allocated beyond the right edge of the matrix. - dt = BLIS_DOUBLE; m = 4; n = 3; - rs = 8; cs = 1; - bli_obj_create( dt, m, n, rs, cs, &a4 ); - - - // - // Example 3: Create objects using other floating-point datatypes. - // - - // Examples of using the other floating-point datatypes. - m = 4; n = 3; - rs = 1; cs = 4; - bli_obj_create( BLIS_FLOAT, m, n, rs, cs, &a5 ); - bli_obj_create( BLIS_SCOMPLEX, m, n, rs, cs, &a6 ); - bli_obj_create( BLIS_DCOMPLEX, m, n, rs, cs, &a7 ); - - - // - // Example 4: Create objects using default (column) storage so that - // we avoid having to specify rs and cs manually. - // - - // Specifying the row and column strides as zero, as is done here, is - // a shorthand request for the default storage scheme, which is - // currently (and always has been) column storage. When requesting the - // default storage scheme with rs = cs = 0, BLIS may insert additional - // padding for alignment purposes. So, the 3x8 matrix object created - // below may end up having a row stride that is greater than 3. When - // in doubt, query the value! - bli_obj_create( BLIS_FLOAT, 3, 5, 0, 0, &a8 ); - - - // - // Example 5: Inspect object fields after creation to expose - // possible alignment/padding. - // - - printf( "\n#\n# -- Example 5 --\n#\n\n" ); - - // Let's inspect the amount of padding inserted for alignment. Note - // the difference between the m dimension and the column stride. - printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a8 ) ) ); - printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a8 ) ) ); - printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a8 ) ); - printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a8 ) ); - printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a8 ) ); - printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a8 ) ); - - - // - // Example 6: Inspect object fields after creation of other floating- - // point datatypes. - // - - printf( "\n#\n# -- Example 6 --\n#\n\n" ); - - bli_obj_create( BLIS_DOUBLE, 3, 5, 0, 0, &a9 ); - bli_obj_create( BLIS_SCOMPLEX, 3, 5, 0, 0, &a10); - bli_obj_create( BLIS_DCOMPLEX, 3, 5, 0, 0, &a11 ); - - printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a9 ) ) ); - printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a9 ) ) ); - printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a9 ) ); - printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a9 ) ); - printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a9 ) ); - printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a9 ) ); - - printf( "\n" ); - printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a10 ) ) ); - printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a10 ) ) ); - printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a10 ) ); - printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a10 ) ); - printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a10 ) ); - printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a10 ) ); - - printf( "\n" ); - printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a11 ) ) ); - printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a11 ) ) ); - printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a11 ) ); - printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a11 ) ); - printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a11 ) ); - printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a11 ) ); - - - // - // Example 7: Initialize an object's elements to random values and then - // print the matrix. - // - - printf( "\n#\n# -- Example 7 --\n#\n\n" ); - - // We can set matrices to random values. The default behavior of - // bli_randm() is to use random values on the internval [-1,1]. - bli_randm( &a9 ); - - // And we can also print the matrices associated with matrix objects. - // Notice that the third argument is a printf()-style format specifier. - // Any valid printf() format specifier can be passed in here, but you - // still need to make sure that the specifier makes sense for the data - // being printed. For example, you shouldn't use "%d" when printing - // elements of type 'float'. - bli_printm( "matrix 'a9' contents:", &a9, "%4.1f", "" ); - - - // - // Example 8: Randomize and then print from an object containing a complex - // matrix. - // - - printf( "\n#\n# -- Example 8 --\n#\n\n" ); - - // When printing complex matrices, the same format specifier gets used - // for both the real and imaginary parts. - bli_randm( &a11 ); - bli_printm( "matrix 'a11' contents (complex):", &a11, "%4.1f", "" ); - - - // - // Example 9: Create, randomize, and print vector objects. - // - - printf( "\n#\n# -- Example 9 --\n#\n\n" ); - - // Now let's create two vector objects--a row vector and a column vector. - // (A vector object is like a matrix object, except that it has at least - // one unit dimension (equal to one). - bli_obj_create( BLIS_DOUBLE, 4, 1, 0, 0, &v1 ); - bli_obj_create( BLIS_DOUBLE, 1, 6, 0, 0, &v2 ); - - // If we know the object is a vector, we can use bli_randv(), though - // bli_randm() would work just as well, since any vector is also a matrix. - bli_randv( &v1 ); - bli_randv( &v2 ); - - // We can print vectors, too. - bli_printm( "vector 'v1' contents:", &v1, "%5.1f", "" ); - bli_printm( "vector 'v2' contents:", &v2, "%5.1f", "" ); - - - // Free all of the objects we created. - bli_obj_free( &a1 ); - bli_obj_free( &a2 ); - bli_obj_free( &a3 ); - bli_obj_free( &a4 ); - bli_obj_free( &a5 ); - bli_obj_free( &a6 ); - bli_obj_free( &a7 ); - bli_obj_free( &a8 ); - bli_obj_free( &a9 ); - bli_obj_free( &a10 ); - bli_obj_free( &a11 ); - bli_obj_free( &v1 ); - bli_obj_free( &v2 ); - - return hpx::finalize(); -} - -int main(int argc, char ** argv){ - return hpx::init(argc, argv); -} - diff --git a/examples/hpx/oapi/01obj_attach.cpp b/examples/hpx/oapi/01obj_attach.cpp deleted file mode 100644 index 4c5304d6ef..0000000000 --- a/examples/hpx/oapi/01obj_attach.cpp +++ /dev/null @@ -1,183 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include -#include "blis.h" - -#include -#include - -void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); -void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); - -int hpx_main( int argc, char** argv ) -{ - obj_t a1, a2; - num_t dt; - dim_t m, n; - inc_t rs, cs; - - - // - // This file demonstrates interfacing external or existing buffers - // with BLIS objects. - // - - - // - // Example 1: Create a bufferless object and then attach an external - // buffer to it, specifying column storage. - // - - printf( "\n#\n# -- Example 1 --\n#\n\n" ); - - // We'll use these parameters for the following examples. - dt = BLIS_DOUBLE; - m = 4; n = 5; rs = 1; cs = m; - - // First we allocate and initialize a matrix by columns. - double* p1 = static_cast(malloc( m * n * sizeof( double ) )); - init_dmatrix_by_cols( m, n, p1, rs, cs ); - - // bli_obj_create() automatically allocates an array large enough to hold - // of the elements. We can also create a "bufferless" object and then - // "attach" our own buffer to that object. This is useful when interfacing - // BLIS objects to an existing application that produces its own matrix - // arrays/buffers. - bli_obj_create_without_buffer( dt, m, n, &a1 ); - - // Note that the fourth argument of bli_obj_attach_buffer() is the so-called - // "imaginary stride". First of all, this stride only has meaning in the - // complex domain. Secondly, it is a somewhat experimental property of the - // obj_t, and one that is not fully recognized/utilized throughout BLIS. - // Thus, the safe thing to do is to always pass in a 0, which is a request - // for the default (which is actually 1). Please don't use any other value - // unless you really know what you are doing. - bli_obj_attach_buffer( p1, rs, cs, 0, &a1 ); - - // Now let's print the matrix so we can see how the element values were - // assigned. - bli_printm( "matrix 'a1', initialized by columns:", &a1, "%5.1f", "" ); - - - // - // Example 2: Create a bufferless object and then attach an external - // buffer to it, specifying row storage. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - // Now let's allocate another buffer, but this time we'll initialize it by - // rows instead of by columns. We'll use the same values for m, n, rs, cs. - double* p2 = static_cast(malloc( m * n * sizeof( double ) )); - init_dmatrix_by_rows( m, n, p2, rs, cs ); - - // Create a new bufferless object and attach the new buffer. This time, - // instead of calling bli_obj_create_without_buffer() followed by - // bli_obj_attach_buffer(), we call bli_obj_create_with_attached_buffer(), - // which is just a convenience wrapper around the former two functions. - // (Note that the wrapper function omits the imaginary stride argument.) -#if 1 - bli_obj_create_with_attached_buffer( dt, m, n, p2, rs, cs, &a2 ); -#else - bli_obj_create_without_buffer( dt, m, n, &a2 ); - bli_obj_attach_buffer( p2, rs, cs, 0, &a2 ); -#endif - - // Print the matrix so we can compare it to the first matrix output. - bli_printm( "matrix 'a2', initialized by rows:", &a2, "%5.1f", "" ); - - // Please note that after creating an object via either of: - // - bli_obj_create_without_buffer(), or - // - bli_obj_create_with_attached_buffer() - // we do NOT free it! That's because these functions merely initialize the - // object and do not actually allocate any memory. - - - // Free the memory arrays we allocated. - free( p1 ); - free( p2 ); - - return hpx::finalize(); -} - -// ----------------------------------------------------------------------------- - -void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) -{ - dim_t i, j; - - double alpha = 0.0; - - // Step through a matrix by rows, assigning each element a unique - // value, starting at 0. - for ( i = 0; i < m; ++i ) - { - for ( j = 0; j < n; ++j ) - { - double* a_ij = a + i*rs + j*cs; - - *a_ij = alpha; - - alpha += 1.0; - } - } -} - -void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) -{ - dim_t i, j; - - double alpha = 0.0; - - // Step through a matrix by columns, assigning each element a unique - // value, starting at 0. - for ( j = 0; j < n; ++j ) - { - for ( i = 0; i < m; ++i ) - { - double* a_ij = a + i*rs + j*cs; - - *a_ij = alpha; - - alpha += 1.0; - } - } -} - -int main(int argc, char** argv) { - return hpx::init(argc, argv); -} diff --git a/examples/hpx/oapi/02obj_ij.cpp b/examples/hpx/oapi/02obj_ij.cpp deleted file mode 100644 index 7366b01660..0000000000 --- a/examples/hpx/oapi/02obj_ij.cpp +++ /dev/null @@ -1,277 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include -#include "blis.h" - -#include -#include - -void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); -void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); -void init_dobj_by_cols( obj_t* a ); -void init_zobj_by_cols( obj_t* a ); - -int hpx_main( int argc, char** argv ) -{ - obj_t a1, a2, a3; - num_t dt; - dim_t m, n; - inc_t rs, cs; - dim_t i, j; - - - // - // This file demonstrates accessing and updating individual matrix elements - // through the BLIS object API. - // - - - // - // Example 1: Create an object and then individually access/view some of - // its elements. - // - - printf( "\n#\n# -- Example 1 --\n#\n\n" ); - - // We'll use these parameters for the following examples. - dt = BLIS_DOUBLE; - m = 4; n = 5; rs = 1; cs = m; - - // Create a object with known elements using the same approach as the - // previous example file. - double* p1 = static_cast(malloc( m * n * sizeof( double ) )); - init_dmatrix_by_cols( m, n, p1, rs, cs ); - bli_obj_create_with_attached_buffer( dt, m, n, p1, rs, cs, &a1 ); - - bli_printm( "matrix 'a1' (initial state)", &a1, "%5.1f", "" ); - - // Regardless of how we create our object--whether via bli_obj_create() or - // via attaching an existing buffer to a bufferless object--we can access - // individual elements by specifying their offsets. The output value is - // broken up by real and imaginary component. (When accessing real matrices, - // the imaginary component will always be zero.) - i = 1; j = 3; - double alpha_r, alpha_i; - bli_getijm( i, j, &a1, &alpha_r, &alpha_i ); - - // Here, we print out the element "returned" by bli_getijm(). - printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", ( int )i, ( int )j, alpha_r, alpha_i ); - - // Let's query a few more elements. - i = 0; j = 2; - bli_getijm( i, j, &a1, &alpha_r, &alpha_i ); - - printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", ( int )i, ( int )j, alpha_r, alpha_i ); - - i = 3; j = 4; - bli_getijm( i, j, &a1, &alpha_r, &alpha_i ); - - printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", ( int )i, ( int )j, alpha_r, alpha_i ); - - printf( "\n" ); - - - // - // Example 2: Modify individual elements of an existing matrix. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - // Now let's change a few elements. Even if we set the imaginary - // argument to a non-zero value, argument is ignored since we're - // modifying a real matrix. If a1 were a complex object, those - // values would be stored verbatim into the appropriate matrix - // elements (see example for a3 below). - alpha_r = -3.0; alpha_i = 0.0; i = 1; j = 3; - bli_setijm( alpha_r, alpha_i, i, j, &a1 ); - - alpha_r = -9.0; alpha_i = -1.0; i = 0; j = 2; - bli_setijm( alpha_r, alpha_i, i, j, &a1 ); - - alpha_r = -7.0; alpha_i = 2.0; i = 3; j = 4; - bli_setijm( alpha_r, alpha_i, i, j, &a1 ); - - // Print the matrix again so we can see the update elements. - bli_printm( "matrix 'a1' (modified state)", &a1, "%5.1f", "" ); - - // Next, let's create a regular object (with a buffer) and then - // initialize its elements using bli_setijm(). - bli_obj_create( dt, m, n, rs, cs, &a2 ); - - // See definition of init_dobj_by_cols() below. - init_dobj_by_cols( &a2 ); - - // Because we initialized a2 in the same manner as a1 (by columns), - // it should contain the same initial state as a1. - bli_printm( "matrix 'a2'", &a2, "%5.1f", "" ); - - - // - // Example 3: Modify individual elements of an existing complex matrix. - // - - printf( "\n#\n# -- Example 3 --\n#\n\n" ); - - // Create and initialize a complex object. - dt = BLIS_DCOMPLEX; - bli_obj_create( dt, m, n, rs, cs, &a3 ); - - // Initialize the matrix elements. (See definition of init_dobj_by_cols() - // below). - init_zobj_by_cols( &a3 ); - - // Print the complex matrix. - bli_printm( "matrix 'a3' (initial state)", &a3, "%5.1f", "" ); - - i = 3; j = 0; - bli_getijm( i, j, &a3, &alpha_r, &alpha_i ); - alpha_r *= -1.0; alpha_i *= -1.0; - bli_setijm( alpha_r, alpha_i, i, j, &a3 ); - - i = 3; j = 4; - bli_getijm( i, j, &a3, &alpha_r, &alpha_i ); - alpha_r *= -1.0; alpha_i *= -1.0; - bli_setijm( alpha_r, alpha_i, i, j, &a3 ); - - i = 0; j = 4; - bli_getijm( i, j, &a3, &alpha_r, &alpha_i ); - alpha_r *= -1.0; alpha_i *= -1.0; - bli_setijm( alpha_r, alpha_i, i, j, &a3 ); - - // Print the matrix again so we can see the update elements. - bli_printm( "matrix 'a3' (modified state)", &a3, "%5.1f", "" ); - - // Free the memory arrays we allocated. - free( p1 ); - - - // Free the objects we created. - bli_obj_free( &a2 ); - bli_obj_free( &a3 ); - - return hpx::finalize(); -} - -// ----------------------------------------------------------------------------- - -void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) -{ - dim_t i, j; - - double alpha = 0.0; - - // Step through a matrix by rows, assigning each element a unique - // value, starting at 0. - for ( i = 0; i < m; ++i ) - { - for ( j = 0; j < n; ++j ) - { - double* a_ij = a + i*rs + j*cs; - - *a_ij = alpha; - - alpha += 1.0; - } - } -} - -void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) -{ - dim_t i, j; - - double alpha = 0.0; - - // Step through a matrix by columns, assigning each element a unique - // value, starting at 0. - for ( j = 0; j < n; ++j ) - { - for ( i = 0; i < m; ++i ) - { - double* a_ij = a + i*rs + j*cs; - - *a_ij = alpha; - - alpha += 1.0; - } - } -} - -void init_dobj_by_cols( obj_t* a ) -{ - dim_t m = bli_obj_length( a ); - dim_t n = bli_obj_width( a ); - dim_t i, j; - - double alpha = 0.0; - - // Step through a matrix by columns, assigning each element a unique - // value, starting at 0. - for ( j = 0; j < n; ++j ) - { - for ( i = 0; i < m; ++i ) - { - bli_setijm( alpha, 0.0, i, j, a ); - - alpha += 1.0; - } - } -} - -void init_zobj_by_cols( obj_t* a ) -{ - dim_t m = bli_obj_length( a ); - dim_t n = bli_obj_width( a ); - dim_t i, j; - - double alpha = 0.0; - - // Step through a matrix by columns, assigning each real and imaginary - // element a unique value, starting at 0. - for ( j = 0; j < n; ++j ) - { - for ( i = 0; i < m; ++i ) - { - bli_setijm( alpha, alpha + 1.0, i, j, a ); - - alpha += 2.0; - } - } -} - -int main(int argc, char ** argv) { - return hpx::init(argc, argv); -} diff --git a/examples/hpx/oapi/03obj_view.cpp b/examples/hpx/oapi/03obj_view.cpp deleted file mode 100644 index 55d6588fbe..0000000000 --- a/examples/hpx/oapi/03obj_view.cpp +++ /dev/null @@ -1,286 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include -#include "blis.h" - -#include -#include - -void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); -void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); -void init_dobj_by_cols( obj_t* a ); -void init_zobj_by_cols( obj_t* a ); - -int hpx_main( int argc, char** argv ) -{ - obj_t a1, a2; - obj_t v1, v2, v3, v4, v5; - num_t dt; - dim_t m, n; - inc_t rs, cs; - dim_t i, j; - dim_t mv, nv; - - - // - // This file demonstrates creating and submatrix views into existing matrices. - // - - - // - // Example 1: Create an object and then create a submatrix view. - // - - printf( "\n#\n# -- Example 1 --\n#\n\n" ); - - // We'll use these parameters for the following examples. - dt = BLIS_DOUBLE; - m = 6; n = 7; rs = 1; cs = m; - - // Create an object a1 using bli_obj_create(). - bli_obj_create( dt, m, n, rs, cs, &a1 ); - - // Initialize a1 to contain known values. - init_dobj_by_cols( &a1 ); - - bli_printm( "matrix 'a1' (initial state)", &a1, "%5.1f", "" ); - - // Acquire a 4x3 submatrix view into a1 at (i,j) offsets (1,2). - i = 1; j = 2; mv = 4; nv = 3; - bli_acquire_mpart( i, j, mv, nv, &a1, &v1 ); - - bli_printm( "4x3 submatrix 'v1' at offsets (1,2)", &v1, "%5.1f", "" ); - - // NOTE: Submatrix views should never be passed to bli_obj_free(). It - // will not cause an immediate error, but it is bad practice. Instead, - // you should only release the objects that were created directy via - // bli_obj_create(). In the above example, that means only object a1 - // would be passed to bli_obj_free(). - - - // - // Example 2: Modify the contents of a submatrix view. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - // Modify the first three elements of the first column. - bli_setijm( -3.0, 0.0, 0, 0, &v1 ); - bli_setijm( -4.0, 0.0, 1, 0, &v1 ); - bli_setijm( -5.0, 0.0, 2, 0, &v1 ); - - // Modify the first three elements of the second column. - bli_setijm( -6.0, 0.0, 0, 1, &v1 ); - bli_setijm( -7.0, 0.0, 1, 1, &v1 ); - bli_setijm( -8.0, 0.0, 2, 1, &v1 ); - - // Print the matrix again so we can see the update elements. - bli_printm( "submatrix view 'v1' (modified state)", &v1, "%5.1f", "" ); - bli_printm( "matrix 'a1' (indirectly modified due to changes to 'v1')", &a1, "%5.1f", "" ); - - - // - // Example 3: Create a submatrix view that is "too big". - // - - printf( "\n#\n# -- Example 3 --\n#\n\n" ); - - // bli_acquire_mpart() will safely truncate your requested submatrix - // view dimensions (or even the offsets) if they extend beyond the - // bounds of the parent object. - - bli_printm( "matrix 'a1' (current state)", &a1, "%5.1f", "" ); - - // Acquire a 4x3 submatrix view into a1 at offsets (4,2). Notice how - // the requested view contains four rows, but the view is created with - // only two rows because the starting m offset of 4 leaves only two rows - // left in the parent matrix. - bli_acquire_mpart( 4, 2, 4, 3, &a1, &v2 ); - - bli_printm( "4x3 submatrix 'v2' at offsets (4,2) -- two rows truncated for safety", &v2, "%5.1f", "" ); - - - // - // Example 4: Create a bufferless object, attach an external buffer, and - // then create a submatrix view. - // - - printf( "\n#\n# -- Example 4 --\n#\n\n" ); - - // Create a object with known elements using the same approach as the - // previous example file. - double* p1 = static_cast(malloc( m * n * sizeof( double ) )); - init_dmatrix_by_cols( m, n, p1, rs, cs ); - bli_obj_create_with_attached_buffer( dt, m, n, p1, rs, cs, &a2 ); - - bli_printm( "matrix 'a2' (initial state)", &a2, "%5.1f", "" ); - - // Acquire a 3x4 submatrix view at offset (2,3). - bli_acquire_mpart( 2, 3, 3, 4, &a2, &v3 ); - - bli_printm( "3x4 submatrix view 'v3' at offsets (2,3)", &v3, "%5.1f", "" ); - - - // - // Example 5: Use a submatrix view to set a region of a larger matrix to - // zero. - // - - printf( "\n#\n# -- Example 5 --\n#\n\n" ); - - bli_printm( "3x4 submatrix view 'v3' at offsets (2,3)", &v3, "%5.1f", "" ); - - bli_setm( &BLIS_ZERO, &v3 ); - - bli_printm( "3x4 submatrix view 'v3' (zeroed out)", &v3, "%5.1f", "" ); - - bli_printm( "matrix 'a2' (modified state)", &a2, "%5.1f", "" ); - - - // - // Example 6: Obtain a submatrix view into a submatrix view. - // - - printf( "\n#\n# -- Example 6 --\n#\n\n" ); - - bli_acquire_mpart( 1, 1, 5, 6, &a2, &v4 ); - - bli_printm( "5x6 submatrix view 'v4' at offsets (1,1) of 'a2'", &v4, "%5.1f", "" ); - - bli_acquire_mpart( 1, 0, 4, 5, &v4, &v5 ); - - bli_printm( "4x5 submatrix view 'v5' at offsets (1,0) of 'v4'", &v5, "%5.1f", "" ); - - - // Free the memory arrays we allocated. - free( p1 ); - - // Free the objects we created. - bli_obj_free( &a1 ); - - return hpx::finalize(); -} - -// ----------------------------------------------------------------------------- - -void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) -{ - dim_t i, j; - - double alpha = 0.0; - - // Step through a matrix by rows, assigning each element a unique - // value, starting at 0. - for ( i = 0; i < m; ++i ) - { - for ( j = 0; j < n; ++j ) - { - double* a_ij = a + i*rs + j*cs; - - *a_ij = alpha; - - alpha += 1.0; - } - } -} - -void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) -{ - dim_t i, j; - - double alpha = 0.0; - - // Step through a matrix by columns, assigning each element a unique - // value, starting at 0. - for ( j = 0; j < n; ++j ) - { - for ( i = 0; i < m; ++i ) - { - double* a_ij = a + i*rs + j*cs; - - *a_ij = alpha; - - alpha += 1.0; - } - } -} - -void init_dobj_by_cols( obj_t* a ) -{ - dim_t m = bli_obj_length( a ); - dim_t n = bli_obj_width( a ); - dim_t i, j; - - double alpha = 0.0; - - // Step through a matrix by columns, assigning each element a unique - // value, starting at 0. - for ( j = 0; j < n; ++j ) - { - for ( i = 0; i < m; ++i ) - { - bli_setijm( alpha, 0.0, i, j, a ); - - alpha += 1.0; - } - } -} - -void init_zobj_by_cols( obj_t* a ) -{ - dim_t m = bli_obj_length( a ); - dim_t n = bli_obj_width( a ); - dim_t i, j; - - double alpha = 0.0; - - // Step through a matrix by columns, assigning each real and imaginary - // element a unique value, starting at 0. - for ( j = 0; j < n; ++j ) - { - for ( i = 0; i < m; ++i ) - { - bli_setijm( alpha, alpha + 1.0, i, j, a ); - - alpha += 2.0; - } - } -} - -int main( int argc, char** argv ) { - return hpx::init(argc, argv); -} diff --git a/examples/hpx/oapi/04level0.cpp b/examples/hpx/oapi/04level0.cpp deleted file mode 100644 index 60853fa685..0000000000 --- a/examples/hpx/oapi/04level0.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include "blis.h" - -#include -#include - -int hpx_main( int argc, char** argv ) -{ - obj_t alpha, beta, gamma, kappa, zeta; - num_t dt; - double gamma_d; - - - // - // This file demonstrates working with scalar objects. - // - - - // - // Example 1: Create a scalar (1x1) object. - // - - dt = BLIS_DOUBLE; - - // The easiest way to create a scalar object is with the following - // convenience function. - bli_obj_create_1x1( dt, &alpha ); - - // We could, of course, create an object using our more general-purpose - // function, using m = n = 1. - bli_obj_create( dt, 1, 1, 0, 0, &beta ); - - // We can even attach an external scalar. This function, unlike - // bli_obj_create_1x1() and bli_obj_create(), does not result in any - // memory allocation. - bli_obj_create_1x1_with_attached_buffer( dt, &gamma_d, &gamma ); - - // There is one more way to create an object. Like the previous method, - // it also avoids memory allocation by referencing a special "internal" - // scalar that is invisibly part of every object. - bli_obj_scalar_init_detached( dt, &kappa ); - - // Digression: In the most common cases, there is no need to create scalar - // objects to begin with. That's because BLIS comes with three ready-to-use - // globally-scoped scalar objects: - // - // obj_t BLIS_MINUS_ONE; - // obj_t BLIS_ZERO; - // obj_t BLIS ONE; - // - // Each of these special objects is provided by blis.h. They can be used - // wherever a scalar object is expected as an input operand regardless of - // the datatype of your other operands. Note that you should never try to - // modify these global scalar objects directly, nor should you ever try to - // perform an operation *on* the objects (that is, you should never try to - // update their values, though you can always perform operations *with* - // them--that's the whole point!). - - - // - // Example 2: Set the value of an existing scalar object. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - // Once you've created an object, you can set its value via setsc. As with - // setijm, setsc takes a real and imaginary value, but you can ignore the - // imaginary argument if your object is real. And even if you pass in a - // non-zero value, it is ignored for real objects. - bli_setsc( -4.0, 0.0, &alpha ); - bli_setsc( 3.0, 1.0, &beta ); - bli_setsc( 0.5, 0.0, &kappa ); - bli_setsc( 10.0, 0.0, &gamma ); - - // BLIS does not have a special print function for scalars, but since a - // 1x1 is also a vector and a matrix, we can use printv or printm. - bli_printm( "alpha:", &alpha, "%4.1f", "" ); - bli_printm( "beta:", &beta, "%4.1f", "" ); - bli_printm( "kappa:", &kappa, "%4.1f", "" ); - bli_printm( "gamma:", &gamma, "%4.1f", "" ); - - - // - // Example 3: Create and set the value of a complex scalar object. - // - - printf( "\n#\n# -- Example 3 --\n#\n\n" ); - - // Create one more scalar, this time a complex scalar, to show how it - // can be used. - bli_obj_create_1x1( BLIS_DCOMPLEX, &zeta ); - bli_setsc( 3.3, -4.4, &zeta ); - bli_printm( "zeta (complex):", &zeta, "%4.1f", "" ); - - - // - // Example 4: Copy scalar objects. - // - - printf( "\n#\n# -- Example 4 --\n#\n\n" ); - - // We can copy scalars amongst one another, and we can use the global - // scalar constants for input operands. - bli_copysc( &beta, &gamma ); - bli_printm( "gamma (overwritten with beta):", &gamma, "%4.1f", "" ); - - bli_copysc( &BLIS_ONE, &gamma ); - bli_printm( "gamma (overwritten with BLIS_ONE):", &gamma, "%4.1f", "" ); - - - // - // Example 5: Perform other operations on scalar objects. - // - - printf( "\n#\n# -- Example 5 --\n#\n\n" ); - - // BLIS defines a range of basic floating-point operations on scalars. - bli_addsc( &beta, &gamma ); - bli_printm( "gamma := gamma + beta", &gamma, "%4.1f", "" ); - - bli_subsc( &alpha, &gamma ); - bli_printm( "gamma := gamma - alpha", &gamma, "%4.1f", "" ); - - bli_divsc( &kappa, &gamma ); - bli_printm( "gamma := gamma / kappa", &gamma, "%4.1f", "" ); - - bli_sqrtsc( &gamma, &gamma ); - bli_printm( "gamma := sqrt( gamma )", &gamma, "%4.1f", "" ); - - bli_normfsc( &alpha, &alpha ); - bli_printm( "alpha := normf( alpha ) # normf() = abs() in real domain.", &alpha, "%4.1f", "" ); - - // Note that normfsc() allows complex input objects, but requires that the - // output operand (the second operand) be a real object. - bli_normfsc( &zeta, &alpha ); - bli_printm( "alpha := normf( zeta ) # normf() = complex modulus in complex domain.", &alpha, "%4.1f", "" ); - - bli_invertsc( &gamma ); - bli_printm( "gamma := 1.0 / gamma", &gamma, "%4.2f", "" ); - - - // Only free the objects that resulted in actual allocation. - bli_obj_free( &alpha ); - bli_obj_free( &beta ); - bli_obj_free( &zeta ); - - return hpx::finalize(); -} - -// ----------------------------------------------------------------------------- - -int main( int argc, char** argv ) { - return hpx::init(argc, argv); -} diff --git a/examples/hpx/oapi/05level1v.cpp b/examples/hpx/oapi/05level1v.cpp deleted file mode 100644 index 197ffab374..0000000000 --- a/examples/hpx/oapi/05level1v.cpp +++ /dev/null @@ -1,190 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include "blis.h" - -#include -#include - -int hpx_main( int argc, char** argv ) -{ - obj_t alpha, beta, gamma; - obj_t x, y, z, w, a; - num_t dt; - dim_t m, n; - inc_t rs, cs; - - - // - // This file demonstrates working with vector objects and the level-1v - // operations. - // - - - // - // Example 1: Create vector objects and then broadcast (copy) scalar - // values to all elements. - // - - printf( "\n#\n# -- Example 1 --\n#\n\n" ); - - // Create a few vectors to work with. We make them all of the same length - // so that we can perform operations between them. - // NOTE: We've chosen to use row vectors here (1x4) instead of column - // vectors (4x1) to allow for easier reading of standard output (less - // scrolling). - dt = BLIS_DOUBLE; - m = 1; n = 4; rs = 0; cs = 0; - bli_obj_create( dt, m, n, rs, cs, &x ); - bli_obj_create( dt, m, n, rs, cs, &y ); - bli_obj_create( dt, m, n, rs, cs, &z ); - bli_obj_create( dt, m, n, rs, cs, &w ); - bli_obj_create( dt, m, n, rs, cs, &a ); - - // Let's also create and initialize some scalar objects. - bli_obj_create_1x1( dt, &alpha ); - bli_obj_create_1x1( dt, &beta ); - bli_obj_create_1x1( dt, &gamma ); - - bli_setsc( 2.0, 0.0, &alpha ); - bli_setsc( 0.2, 0.0, &beta ); - bli_setsc( 3.0, 0.0, &gamma ); - - bli_printm( "alpha:", &alpha, "%4.1f", "" ); - bli_printm( "beta:", &beta, "%4.1f", "" ); - bli_printm( "gamma:", &gamma, "%4.1f", "" ); - - // Vectors can set by "broadcasting" a constant to every element. - bli_setv( &BLIS_ONE, &x ); - bli_setv( &alpha, &y ); - bli_setv( &BLIS_ZERO, &z ); - - // Note that we can use printv or printm to print vectors since vectors - // are also matrices. We choose to use printm because it honors the - // orientation of the vector (row or column) when printing, whereas - // printv always prints vectors as column vectors regardless of their - // they are 1 x n or n x 1. - bli_printm( "x := 1.0", &x, "%4.1f", "" ); - bli_printm( "y := alpha", &y, "%4.1f", "" ); - bli_printm( "z := 0.0", &z, "%4.1f", "" ); - - - // - // Example 2: Randomize a vector object. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - // Set a vector to random values. - bli_randv( &w ); - - bli_printm( "w := randv()", &w, "%4.1f", "" ); - - - // - // Example 3: Perform various element-wise operations on vector objects. - // - - printf( "\n#\n# -- Example 3 --\n#\n\n" ); - - // Copy a vector. - bli_copyv( &w, &a ); - bli_printm( "a := w", &a, "%4.1f", "" ); - - // Add and subtract vectors. - bli_addv( &y, &a ); - bli_printm( "a := a + y", &a, "%4.1f", "" ); - - bli_subv( &w, &a ); - bli_printm( "a := a - w", &a, "%4.1f", "" ); - - // Scale a vector (destructive). - bli_scalv( &beta, &a ); - bli_printm( "a := beta * a", &a, "%4.1f", "" ); - - // Scale a vector (non-destructive). - bli_scal2v( &gamma, &a, &z ); - bli_printm( "z := gamma * a", &z, "%4.1f", "" ); - - // Scale and accumulate between vectors. - bli_axpyv( &alpha, &w, &x ); - bli_printm( "x := x + alpha * w", &x, "%4.1f", "" ); - - bli_xpbyv( &w, &BLIS_MINUS_ONE, &x ); - bli_printm( "x := -1.0 * x + w", &x, "%4.1f", "" ); - - // Invert a vector element-wise. - bli_invertv( &y ); - bli_printm( "y := 1 / y", &y, "%4.1f", "" ); - - // Swap two vectors. - bli_swapv( &x, &y ); - bli_printm( "x (after swapping with y)", &x, "%4.1f", "" ); - bli_printm( "y (after swapping with x)", &y, "%4.1f", "" ); - - - // - // Example 4: Perform contraction-like operations on vector objects. - // - - printf( "\n#\n# -- Example 4 --\n#\n\n" ); - - // Perform a dot product. - bli_dotv( &a, &z, &gamma ); - bli_printm( "gamma := a * z (dot product)", &gamma, "%5.2f", "" ); - - // Perform an extended dot product. - bli_dotxv( &alpha, &a, &z, &BLIS_ONE, &gamma ); - bli_printm( "gamma := 1.0 * gamma + alpha * a * z (accumulate scaled dot product)", &gamma, "%5.2f", "" ); - - - // Free the objects. - bli_obj_free( &alpha ); - bli_obj_free( &beta ); - bli_obj_free( &gamma ); - bli_obj_free( &x ); - bli_obj_free( &y ); - bli_obj_free( &z ); - bli_obj_free( &w ); - bli_obj_free( &a ); - - return hpx::finalize(); -} - -// ----------------------------------------------------------------------------- -int main( int argc, char** argv ) { - return hpx::init(argc, argv); -} diff --git a/examples/hpx/oapi/06level1m.cpp b/examples/hpx/oapi/06level1m.cpp deleted file mode 100644 index 12832ce4f9..0000000000 --- a/examples/hpx/oapi/06level1m.cpp +++ /dev/null @@ -1,236 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include "blis.h" - -#include -#include - -int hpx_main( int argc, char** argv ) -{ - obj_t alpha, beta, gamma; - obj_t a, b, c, d, e, f, g, h; - num_t dt; - dim_t m, n; - inc_t rs, cs; - - - // - // This file demonstrates working with matrix objects and the level-1m - // operations. - // - - - // - // Example 1: Create matrix objects and then broadcast (copy) scalar - // values to all elements. - // - - printf( "\n#\n# -- Example 1 --\n#\n\n" ); - - // Create a few matrices to work with. We make them all of the same - // dimensions so that we can perform operations between them. - dt = BLIS_DOUBLE; - m = 2; n = 3; rs = 0; cs = 0; - bli_obj_create( dt, m, n, rs, cs, &a ); - bli_obj_create( dt, m, n, rs, cs, &b ); - bli_obj_create( dt, m, n, rs, cs, &c ); - bli_obj_create( dt, m, n, rs, cs, &d ); - bli_obj_create( dt, m, n, rs, cs, &e ); - - // Let's also create and initialize some scalar objects. - bli_obj_create_1x1( dt, &alpha ); - bli_obj_create_1x1( dt, &beta ); - bli_obj_create_1x1( dt, &gamma ); - - bli_setsc( 2.0, 0.0, &alpha ); - bli_setsc( 0.2, 0.0, &beta ); - bli_setsc( 3.0, 0.0, &gamma ); - - bli_printm( "alpha:", &alpha, "%4.1f", "" ); - bli_printm( "beta:", &beta, "%4.1f", "" ); - bli_printm( "gamma:", &gamma, "%4.1f", "" ); - - // Matrices, like vectors, can set by "broadcasting" a constant to every - // element. - bli_setm( &BLIS_ONE, &a ); - bli_setm( &alpha, &b ); - bli_setm( &BLIS_ZERO, &c ); - - bli_printm( "a := 1.0", &a, "%4.1f", "" ); - bli_printm( "b := alpha", &b, "%4.1f", "" ); - bli_printm( "c := 0.0", &c, "%4.1f", "" ); - - - // - // Example 2: Randomize a matrix object. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - // Set a matrix to random values. - bli_randm( &e ); - - bli_printm( "e (randomized):", &e, "%4.1f", "" ); - - - // - // Example 3: Perform element-wise operations on matrices. - // - - printf( "\n#\n# -- Example 3 --\n#\n\n" ); - - // Copy a matrix. - bli_copym( &e, &d ); - bli_printm( "d := e", &d, "%4.1f", "" ); - - // Add and subtract vectors. - bli_addm( &a, &d ); - bli_printm( "d := d + a", &d, "%4.1f", "" ); - - bli_subm( &a, &e ); - bli_printm( "e := e - a", &e, "%4.1f", "" ); - - // Scale a matrix (destructive). - bli_scalm( &alpha, &e ); - bli_printm( "e := alpha * e", &e, "%4.1f", "" ); - - // Scale a matrix (non-destructive). - bli_scal2m( &beta, &e, &c ); - bli_printm( "c := beta * e", &c, "%4.1f", "" ); - - // Scale and accumulate between matrices. - bli_axpym( &alpha, &a, &c ); - bli_printm( "c := c + alpha * a", &c, "%4.1f", "" ); - - - // - // Example 4: Copy and transpose a matrix. - // - - printf( "\n#\n# -- Example 4 --\n#\n\n" ); - - // Create an n-by-m matrix into which we can copy-transpose an m-by-n - // matrix. - bli_obj_create( dt, n, m, rs, cs, &f ); - - // Initialize all of 'f' to -1.0 to simulate junk values. - bli_setm( &BLIS_MINUS_ONE, &f ); - - bli_printm( "e:", &e, "%4.1f", "" ); - bli_printm( "f (initial value):", &f, "%4.1f", "" ); - - // Since we are going to copy 'e' to 'f', we need to indicate a transpose - // on 'e', the input operand. Transposition can be indicated by setting a - // bit in the object. Since it always starts out as "no transpose", we can - // simply toggle the bit. - bli_obj_toggle_trans( &e ); - - // Another way to mark and object for transposition is to set it directly. - //bli_obj_set_onlytrans( BLIS_TRANSPOSE, &e ); - - // A third way is to "apply" a transposition. This is equivalent to toggling - // the transposition when the value being applied is BLIS_TRANSPOSE. If - // the value applied is BLIS_NO_TRANSPOSE, the transposition bit in the - // targeted object is unaffected. (Applying transposes is more useful in - // practice when the 'trans' argument is a variable and not a constant - // literal.) - //bli_obj_apply_trans( BLIS_TRANSPOSE, &e ); - //bli_obj_apply_trans( BLIS_NO_TRANSPOSE, &e ); - //bli_obj_apply_trans( trans, &e ); - - // Copy 'e' to 'f', transposing 'e' in the process. Notice that we haven't - // modified any properties of 'd'. It's the source operand that matters - // when marking an operand for transposition, not the destination. - bli_copym( &e, &f ); - - bli_printm( "f (copied value):", &f, "%4.1f", "" ); - - - // - // Example 5: Copy and Hermitian-transpose a matrix. - // - - printf( "\n#\n# -- Example 5 --\n#\n\n" ); - - // Create an n-by-m complex matrix into which we can Hermitian-transpose - // (or, conjugate-transpose) another complex (m-by-n) matrix. - dt = BLIS_DCOMPLEX; - bli_obj_create( dt, m, n, rs, cs, &g ); - bli_obj_create( dt, n, m, rs, cs, &h ); - - // Randomize 'g', the input operand. - bli_randm( &g ); - - // Initialize all of 'h' to -1.0 to simulate junk values. - bli_setm( &BLIS_MINUS_ONE, &h ); - - bli_printm( "g:", &g, "%4.1f", "" ); - bli_printm( "h (initial value):", &h, "%4.1f", "" ); - - // Set both the transpose and conjugation bits. - bli_obj_toggle_trans( &g ); - bli_obj_toggle_conj( &g ); - - // Copy 'g' to 'h', conjugating and transposing 'g' in the process. - // Once again, notice that it's the source operand that we've marked for - // conjugation. - bli_copym( &g, &h ); - - bli_printm( "h (copied value):", &h, "%4.1f", "" ); - - - // Free the objects. - bli_obj_free( &alpha ); - bli_obj_free( &beta ); - bli_obj_free( &gamma ); - bli_obj_free( &a ); - bli_obj_free( &b ); - bli_obj_free( &c ); - bli_obj_free( &d ); - bli_obj_free( &e ); - bli_obj_free( &f ); - bli_obj_free( &g ); - bli_obj_free( &h ); - - return hpx::finalize(); -} - -// ----------------------------------------------------------------------------- - -int main( int argc, char** argv ) { - return hpx::init(argc, argv); -} diff --git a/examples/hpx/oapi/07level1m_diag.cpp b/examples/hpx/oapi/07level1m_diag.cpp deleted file mode 100644 index 5fb00fa188..0000000000 --- a/examples/hpx/oapi/07level1m_diag.cpp +++ /dev/null @@ -1,338 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include "blis.h" - -#include -#include - -int hpx_main( int argc, char** argv ) -{ - num_t dt; - dim_t m, n; - inc_t rs, cs; - - - // - // This file demonstrates level-1m operations on structured matrices. - // - - - // - // Example 1: Initialize the upper triangle of a matrix to random values. - // - - printf( "\n#\n# -- Example 1 --\n#\n\n" ); - - obj_t a; - - // Create a matrix to work with. - dt = BLIS_DOUBLE; - m = 5; n = 5; rs = 0; cs = 0; - bli_obj_create( dt, m, n, rs, cs, &a ); - - // First, we mark the matrix structure as triangular. - bli_obj_set_struc( BLIS_TRIANGULAR, &a ); - - // Next, we specify whether the lower part or the upper part is to be - // recognized as the "stored" region (which we call the uplo field). The - // strictly opposite part (in this case, the strictly lower region) will - // be *assumed* to be zero during computation. However, when printed out, - // the strictly lower part may contain junk values. - bli_obj_set_uplo( BLIS_UPPER, &a ); - - // Now set the upper triangle to random values. - bli_randm( &a ); - - bli_printm( "a: randomize upper part (lower part may contain garbage)", &a, "%4.1f", "" ); - - - // - // Example 2: Initialize the upper triangle of a matrix to random values - // but also explicitly set the strictly lower triangle to zero. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - obj_t b, bl; - - // Create a matrix to work with. - dt = BLIS_DOUBLE; - m = 5; n = 5; rs = 0; cs = 0; - bli_obj_create( dt, m, n, rs, cs, &b ); - - // Set structure and uplo. - bli_obj_set_struc( BLIS_TRIANGULAR, &b ); - bli_obj_set_uplo( BLIS_UPPER, &b ); - - // Create an alias, 'bl', of the original object 'b'. Both objects will - // refer to the same underlying matrix elements, but now we will have two - // different "views" into the matrix. Aliases are simply "shallow copies" - // of the objects, meaning no additional memory allocation takes place. - // Therefore it is up to the API user (you) to make sure that you only - // free the original object (or exactly one of the aliases). - bli_obj_alias_to( &b, &bl ); - - // Digression: Each object contains a diagonal offset (even vectors), - // even if it is never needed. The diagonal offset for a newly-created - // object (ie: objects created via bli_obj_create*()) defaults to 0, - // meaning it intersects element (0,0), but it can be changed. When the - // diagonal offset delta is positive, the diagonal intersects element - // (0,delta). When the diagonal offset is negative, the diagonal - // intersects element (-delta,0). In other words, think of element (0,0) - // as the origin of a coordinate plane, with the diagonal being the - // x-axis value. - - // Set the diagonal offset of 'bl' to -1. - bli_obj_set_diag_offset( -1, &bl ); - - // Set the uplo field of 'bl' to "lower". - bli_obj_set_uplo( BLIS_LOWER, &bl ); - - // Set the upper triangle of 'b' to random values. - bli_randm( &b ); - - // Set the strictly lower triangle of 'b' to zero (by setting the lower - // triangle of 'bl' to zero). - bli_setm( &BLIS_ZERO, &bl ); - - bli_printm( "b: randomize upper part; set strictly lower part to 0.0", &b, "%4.1f", "" ); - - // You may not see the effect of setting the strictly lower part to zero, - // since those values may already be zero (instead of random junk). So - // let's set it to something you'll notice, like -1.0. - bli_setm( &BLIS_MINUS_ONE, &bl ); - - bli_printm( "b: randomize upper part; set strictly lower part to -1.0", &b, "%4.1f", "" ); - - - // - // Example 3: Copy the lower triangle of an existing object to a newly - // created (but otherwise uninitialized) object. - // - - printf( "\n#\n# -- Example 3 --\n#\n\n" ); - - obj_t c; - - // Create a matrix to work with. - dt = BLIS_DOUBLE; - m = 5; n = 5; rs = 0; cs = 0; - bli_obj_create( dt, m, n, rs, cs, &c ); - - // Reset the diagonal offset of 'bl' to 0. - bli_obj_set_diag_offset( 0, &bl ); - - // Copy the lower triangle of matrix 'b' from Example 2 to object 'c'. - // This should give us -1.0 in the strictly lower part and some non-zero - // random values along the diagonal. Note that since 'c' is starting out - // uninitialized, the strictly upper part could contain junk. - bli_copym( &bl, &c ); - - bli_printm( "c: copy lower part of b (upper part may contain garbage)", &c, "%4.1f", "" ); - - // Notice that the structure and uplo properties of 'c' were set to their - // default values, BLIS_GENERAL and BLIS_DENSE, respectively. Thus, it is - // the structure and uplo of the *source* operand that controls what gets - // copied, regardless of the structure/uplo of the destination. To - // demonstrate this further, let's see what happens when we copy 'bl' - // (which is lower triangular) to 'a' (which is upper triangular). - - bli_copym( &bl, &a ); - - // The result is that the lower part (diagonal and strictly lower part) is - // copied into 'a', but the elements in the strictly upper part of 'a' are - // unaffected. Note, however, that 'a' is still marked as upper triangular - // and so in future computations where 'a' is an input operand, the -1.0 - // values that were copied from 'bl' into the lower triangle will be - // ignored. Generally speaking, level-1m operations on triangular matrices - // ignore the "unstored" regions of input operands because they are assumed - // to be zero). - - bli_printm( "a: copy lower triangular bl to upper triangular a", &a, "%4.1f", "" ); - - - // - // Example 4: Copy the lower triangle of an existing object into the - // upper triangle of an existing object. - // - - printf( "\n#\n# -- Example 4 --\n#\n\n" ); - - obj_t d; - - // Create a matrix to work with. - dt = BLIS_DOUBLE; - m = 5; n = 5; rs = 0; cs = 0; - bli_obj_create( dt, m, n, rs, cs, &d ); - - // Let's start by setting entire destination matrix to zero. - bli_setm( &BLIS_ZERO, &d ); - - bli_printm( "d: initial value (all zeros)", &d, "%4.1f", "" ); - - // Recall that 'bl' is marked as lower triangular with a diagonal offset - // of 0. Also recall that 'bl' is an alias of 'b', which is now fully - // initialized. But let's change a few values manually so we can later - // see the full effect of the transposition. - bli_setijm( 2.0, 0.0, 2, 0, &bl ); - bli_setijm( 3.0, 0.0, 3, 0, &bl ); - bli_setijm( 4.0, 0.0, 4, 0, &bl ); - bli_setijm( 3.1, 0.0, 3, 1, &bl ); - bli_setijm( 3.2, 0.0, 3, 2, &bl ); - - bli_printm( "bl: lower triangular bl is aliased to b", &bl, "%4.1f", "" ); - - // We want to pluck out the lower triangle and transpose it into the upper - // triangle of 'd'. - bli_obj_toggle_trans( &bl ); - - // Now we copy the transpose of the lower part of 'bl' into the upper - // part of 'd'. (Again, notice that we haven't modified any properties of - // 'd'. It's the source operand that matters, not the destination!) - bli_copym( &bl, &d ); - - bli_printm( "d: transpose of lower triangular of bl copied to d", &d, "%4.1f", "" ); - - - // - // Example 5: Create a rectangular matrix (m > n) with a lower trapezoid - // containing random values, then set the strictly upper - // triangle to zeros. - // - - printf( "\n#\n# -- Example 5 --\n#\n\n" ); - - obj_t e, el; - - // Create a matrix to work with. - dt = BLIS_DOUBLE; - m = 6; n = 4; rs = 0; cs = 0; - bli_obj_create( dt, m, n, rs, cs, &e ); - - // Initialize the entire matrix to -1.0 to simulate junk values. - bli_setm( &BLIS_MINUS_ONE, &e ); - - bli_printm( "e: initial value (all -1.0)", &e, "%4.1f", "" ); - - // Create an alias to work with. - bli_obj_alias_to( &e, &el ); - - // Set structure and uplo of 'el'. - bli_obj_set_struc( BLIS_TRIANGULAR, &el ); - bli_obj_set_uplo( BLIS_LOWER, &el ); - - // Digression: Notice that "triangular" structure does not require that - // the matrix be square. Rather, it simply means that either the part above - // or below the diagonal will be assumed to be zero. - - // Randomize the lower trapezoid. - bli_randm( &el ); - - bli_printm( "e: after lower trapezoid randomized", &e, "%4.1f", "" ); - - // Move the diagonal offset of 'el' to 1 and flip the uplo field to - // "upper". - bli_obj_set_diag_offset( 1, &el ); - bli_obj_set_uplo( BLIS_UPPER, &el ); - - // Set the upper triangle to zero. - bli_setm( &BLIS_ZERO, &el ); - - bli_printm( "e: after upper triangle set to zero", &e, "%4.1f", "" ); - - - // - // Example 6: Create an upper Hessenberg matrix of random values and then - // set the "unstored" values to zero. - // - - printf( "\n#\n# -- Example 6 --\n#\n\n" ); - - obj_t h, hl; - - // Create a matrix to work with. - dt = BLIS_DOUBLE; - m = 5; n = 5; rs = 0; cs = 0; - bli_obj_create( dt, m, n, rs, cs, &h ); - - // Initialize the entire matrix to -1.0 to simulate junk values. - bli_setm( &BLIS_MINUS_ONE, &h ); - - bli_printm( "h: initial value (all -1.0)", &h, "%4.1f", "" ); - - // Set the diagonal offset of 'h' to -1. - bli_obj_set_diag_offset( -1, &h ); - - // Set the structure and uplo of 'h'. - bli_obj_set_struc( BLIS_TRIANGULAR, &h ); - bli_obj_set_uplo( BLIS_UPPER, &h ); - - // Randomize the elements on and above the first subdiagonal. - bli_randm( &h ); - - bli_printm( "h: after randomizing above first subdiagonal", &h, "%4.1f", "" ); - - // Create an alias to work with. - bli_obj_alias_to( &h, &hl ); - - // Flip the uplo of 'hl' and move the diagonal down by one. - bli_obj_set_uplo( BLIS_LOWER, &hl ); - bli_obj_set_diag_offset( -2, &hl ); - - // Set the region strictly below the first subdiagonal (on or below - // the second subdiagonal) to zero. - bli_setm( &BLIS_ZERO, &hl ); - - bli_printm( "h: after setting elements below first subdiagonal to zero", &h, "%4.1f", "" ); - - - // Free the objects. - bli_obj_free( &a ); - bli_obj_free( &b ); - bli_obj_free( &c ); - bli_obj_free( &d ); - bli_obj_free( &e ); - bli_obj_free( &h ); - - return hpx::finalize(); -} - -// ----------------------------------------------------------------------------- - -int main( int argc, char** argv ) { - return hpx::init(argc, argv); -} diff --git a/examples/hpx/oapi/08level2.cpp b/examples/hpx/oapi/08level2.cpp deleted file mode 100644 index b2d19d3fb0..0000000000 --- a/examples/hpx/oapi/08level2.cpp +++ /dev/null @@ -1,334 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include "blis.h" - -#include -#include - -int hpx_main( int argc, char** argv ) -{ - num_t dt; - dim_t m, n; - inc_t rs, cs; - - obj_t a, x, y, b; - obj_t* alpha; - obj_t* beta; - - // - // This file demonstrates level-2 operations. - // - - - // - // Example 1: Perform a general rank-1 update (ger) operation. - // - - printf( "\n#\n# -- Example 1 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - dt = BLIS_DOUBLE; - m = 4; n = 5; rs = 0; cs = 0; - bli_obj_create( dt, m, n, rs, cs, &a ); - bli_obj_create( dt, m, 1, rs, cs, &x ); - bli_obj_create( dt, 1, n, rs, cs, &y ); - - // Set alpha. - alpha = (obj_t*)(&BLIS_ONE); - - // Initialize vectors 'x' and 'y'. - bli_randv( &x ); - bli_setv( &BLIS_MINUS_ONE, &y ); - - // Initialize 'a' to 1.0. - bli_setm( &BLIS_ONE, &a ); - - bli_printm( "x: set to random values", &x, "%4.1f", "" ); - bli_printm( "y: set to -1.0", &y, "%4.1f", "" ); - bli_printm( "a: initial value", &a, "%4.1f", "" ); - - // a := a + alpha * x * y, where 'a' is general. - bli_ger( alpha, &x, &y, &a ); - - bli_printm( "a: after ger", &a, "%4.1f", "" ); - - // Free the objects. - bli_obj_free( &a ); - bli_obj_free( &x ); - bli_obj_free( &y ); - - - // - // Example 2: Perform a general matrix-vector multiply (gemv) operation. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - dt = BLIS_DOUBLE; - m = 4; n = 5; rs = 0; cs = 0; - bli_obj_create( dt, m, n, rs, cs, &a ); - bli_obj_create( dt, 1, n, rs, cs, &x ); - bli_obj_create( dt, 1, m, rs, cs, &y ); - - // Notice that we created vectors 'x' and 'y' as row vectors, even though - // we often think of them as column vectors so that the overall problem - // dimensions remain conformal. Note that this flexibility only comes - // from the fact that the operation requires those operands to be vectors. - // If we were instead looking at an operation where the operands were of - // general shape (such as with the gemm operation), then typically the - // dimensions matter, and column vectors would not be interchangeable with - // row vectors and vice versa. - - // Set the scalars to use. - alpha = (obj_t*)(&BLIS_ONE); - beta = (obj_t*)(&BLIS_ONE); - - // Initialize vectors 'x' and 'y'. - bli_setv( &BLIS_ONE, &x ); - bli_setv( &BLIS_ZERO, &y ); - - // Randomize 'a'. - bli_randm( &a ); - - bli_printm( "a: randomized", &a, "%4.1f", "" ); - bli_printm( "x: set to 1.0", &x, "%4.1f", "" ); - bli_printm( "y: initial value", &y, "%4.1f", "" ); - - // y := beta * y + alpha * a * x, where 'a' is general. - bli_gemv( alpha, &a, &x, beta, &y ); - - bli_printm( "y: after gemv", &y, "%4.1f", "" ); - - // Free the objects. - bli_obj_free( &a ); - bli_obj_free( &x ); - bli_obj_free( &y ); - - - // - // Example 3: Perform a symmetric rank-1 update (syr) operation. - // - - printf( "\n#\n# -- Example 3 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - dt = BLIS_DOUBLE; - m = 5; rs = 0; cs = 0; - bli_obj_create( dt, m, m, rs, cs, &a ); - bli_obj_create( dt, 1, m, rs, cs, &x ); - - // Set alpha. - alpha = (obj_t*)(&BLIS_ONE); - - // Initialize vector 'x'. - bli_randv( &x ); - - // Zero out all of matrix 'a'. This is optional, but will avoid possibly - // displaying junk values in the unstored triangle. - bli_setm( &BLIS_ZERO, &a ); - - // Mark matrix 'a' as symmetric and stored in the lower triangle, and - // then randomize that lower triangle. - bli_obj_set_struc( BLIS_SYMMETRIC, &a ); - bli_obj_set_uplo( BLIS_LOWER, &a ); - bli_randm( &a ); - - bli_printm( "x: set to random values", &x, "%4.1f", "" ); - bli_printm( "a: initial value (zeros in upper triangle)", &a, "%4.1f", "" ); - - // a := a + alpha * x * x^T, where 'a' is symmetric and lower-stored. - bli_syr( alpha, &x, &a ); - - bli_printm( "a: after syr", &a, "%4.1f", "" ); - - // Free the objects. - bli_obj_free( &a ); - bli_obj_free( &x ); - - - // - // Example 4: Perform a symmetric matrix-vector multiply (symv) operation. - // - - printf( "\n#\n# -- Example 4 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - dt = BLIS_DOUBLE; - m = 5; rs = 0; cs = 0; - bli_obj_create( dt, m, m, rs, cs, &a ); - bli_obj_create( dt, 1, m, rs, cs, &x ); - bli_obj_create( dt, 1, m, rs, cs, &y ); - - // Set the scalars to use. - alpha = (obj_t*)(&BLIS_ONE); - beta = (obj_t*)(&BLIS_ONE); - - // Initialize vectors 'x' and 'y'. - bli_setv( &BLIS_ONE, &x ); - bli_setv( &BLIS_ZERO, &y ); - - // Zero out all of matrix 'a'. This is optional, but will avoid possibly - // displaying junk values in the unstored triangle. - bli_setm( &BLIS_ZERO, &a ); - - // Mark matrix 'a' as symmetric and stored in the upper triangle, and - // then randomize that upper triangle. - bli_obj_set_struc( BLIS_SYMMETRIC, &a ); - bli_obj_set_uplo( BLIS_UPPER, &a ); - bli_randm( &a ); - - bli_printm( "a: randomized (zeros in lower triangle)", &a, "%4.1f", "" ); - bli_printm( "x: set to 1.0", &x, "%4.1f", "" ); - bli_printm( "y: initial value", &y, "%4.1f", "" ); - - // y := beta * y + alpha * a * x, where 'a' is symmetric and upper-stored. - bli_symv( alpha, &a, &x, beta, &y ); - - bli_printm( "y: after symv", &y, "%4.1f", "" ); - - // Free the objects. - bli_obj_free( &a ); - bli_obj_free( &x ); - bli_obj_free( &y ); - - - // - // Example 5: Perform a triangular matrix-vector multiply (trmv) operation. - // - - printf( "\n#\n# -- Example 5 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - dt = BLIS_DOUBLE; - m = 5; rs = 0; cs = 0; - bli_obj_create( dt, m, m, rs, cs, &a ); - bli_obj_create( dt, 1, m, rs, cs, &x ); - - // Set the scalars to use. - alpha = (obj_t*)(&BLIS_ONE); - - // Initialize vector 'x'. - bli_setv( &BLIS_ONE, &x ); - - // Zero out all of matrix 'a'. This is optional, but will avoid possibly - // displaying junk values in the unstored triangle. - bli_setm( &BLIS_ZERO, &a ); - - // Mark matrix 'a' as triangular, stored in the lower triangle, and - // having a non-unit diagonal. Then randomize that lower triangle. - bli_obj_set_struc( BLIS_TRIANGULAR, &a ); - bli_obj_set_uplo( BLIS_LOWER, &a ); - bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); - bli_randm( &a ); - - bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" ); - bli_printm( "x: initial value", &x, "%4.1f", "" ); - - // x := alpha * a * x, where 'a' is triangular and lower-stored. - bli_trmv( alpha, &a, &x ); - - bli_printm( "x: after trmv", &x, "%4.1f", "" ); - - // Free the objects. - bli_obj_free( &a ); - bli_obj_free( &x ); - - - // - // Example 6: Perform a triangular solve (trsv) operation. - // - - printf( "\n#\n# -- Example 6 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - dt = BLIS_DOUBLE; - m = 5; rs = 0; cs = 0; - bli_obj_create( dt, m, m, rs, cs, &a ); - bli_obj_create( dt, 1, m, rs, cs, &b ); - bli_obj_create( dt, 1, m, rs, cs, &y ); - - // Set the scalars to use. - alpha = (obj_t*)(&BLIS_ONE); - - // Initialize vector 'x'. - bli_setv( &BLIS_ONE, &b ); - - // Zero out all of matrix 'a'. This is optional, but will avoid possibly - // displaying junk values in the unstored triangle. - bli_setm( &BLIS_ZERO, &a ); - - // Mark matrix 'a' as triangular, stored in the lower triangle, and - // having a non-unit diagonal. Then randomize that lower triangle. - bli_obj_set_struc( BLIS_TRIANGULAR, &a ); - bli_obj_set_uplo( BLIS_LOWER, &a ); - bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); - bli_randm( &a ); - - // Load the diagonal. By setting the diagonal to something of greater - // absolute value than the off-diagonal elements, we increase the odds - // that the matrix is not singular (singular matrices have no inverse). - bli_shiftd( &BLIS_TWO, &a ); - - bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" ); - bli_printm( "b: initial value", &b, "%4.1f", "" ); - - // solve a * x = alpha * b, where 'a' is triangular and lower-stored, and - // overwrite b with the solution vector x. - bli_trsv( alpha, &a, &b ); - - bli_printm( "b: after trsv", &b, "%4.1f", "" ); - - // We can confirm the solution by comparing the product of a and x to the - // original value of b. - bli_copyv( &b, &y ); - bli_trmv( alpha, &a, &y ); - - bli_printm( "y: should equal initial value of b", &y, "%4.1f", "" ); - - // Free the objects. - bli_obj_free( &a ); - bli_obj_free( &b ); - - return hpx::finalize(); -} - -// ----------------------------------------------------------------------------- -// -int main( int argc, char** argv ) { - return hpx::init(argc, argv); -} diff --git a/examples/hpx/oapi/09level3.cpp b/examples/hpx/oapi/09level3.cpp deleted file mode 100644 index 02b86366fa..0000000000 --- a/examples/hpx/oapi/09level3.cpp +++ /dev/null @@ -1,339 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include "blis.h" - -#include -#include - -int hpx_main( int argc, char** argv ) -{ - num_t dt; - dim_t m, n, k; - inc_t rs, cs; - side_t side; - - obj_t a, b, c; - obj_t* alpha; - obj_t* beta; - - - // - // This file demonstrates level-3 operations. - // - - - // - // Example 1: Perform a general matrix-matrix multiply (gemm) operation. - // - - printf( "\n#\n# -- Example 1 --\n#\n\n" ); - - // Create some matrix operands to work with. - dt = BLIS_DOUBLE; - m = 4; n = 5; k = 3; rs = 0; cs = 0; - bli_obj_create( dt, m, n, rs, cs, &c ); - bli_obj_create( dt, m, k, rs, cs, &a ); - bli_obj_create( dt, k, n, rs, cs, &b ); - - // Set the scalars to use. - alpha = (obj_t*)(&BLIS_ONE); - beta = (obj_t*)(&BLIS_ONE); - - // Initialize the matrix operands. - bli_randm( &a ); - bli_setm( &BLIS_ONE, &b ); - bli_setm( &BLIS_ZERO, &c ); - - bli_printm( "a: randomized", &a, "%4.1f", "" ); - bli_printm( "b: set to 1.0", &b, "%4.1f", "" ); - bli_printm( "c: initial value", &c, "%4.1f", "" ); - - // c := beta * c + alpha * a * b, where 'a', 'b', and 'c' are general. - bli_gemm( alpha, &a, &b, beta, &c ); - - bli_printm( "c: after gemm", &c, "%4.1f", "" ); - - // Free the objects. - bli_obj_free( &a ); - bli_obj_free( &b ); - bli_obj_free( &c ); - - - // - // Example 1b: Perform a general matrix-matrix multiply (gemm) operation - // with the left input operand (matrix A) transposed. - // - - printf( "\n#\n# -- Example 1b --\n#\n\n" ); - - // Create some matrix operands to work with. - dt = BLIS_DOUBLE; - m = 4; n = 5; k = 3; rs = 0; cs = 0; - bli_obj_create( dt, m, n, rs, cs, &c ); - bli_obj_create( dt, k, m, rs, cs, &a ); - bli_obj_create( dt, k, n, rs, cs, &b ); - - // Set the scalars to use. - alpha = (obj_t*)(&BLIS_ONE); - beta = (obj_t*)(&BLIS_ONE); - - // Initialize the matrix operands. - bli_randm( &a ); - bli_setm( &BLIS_ONE, &b ); - bli_setm( &BLIS_ZERO, &c ); - - // Set the transpose bit in 'a'. - bli_obj_toggle_trans( &a ); - - bli_printm( "a: randomized", &a, "%4.1f", "" ); - bli_printm( "b: set to 1.0", &b, "%4.1f", "" ); - bli_printm( "c: initial value", &c, "%4.1f", "" ); - - // c := beta * c + alpha * a^T * b, where 'a', 'b', and 'c' are general. - bli_gemm( alpha, &a, &b, beta, &c ); - - bli_printm( "c: after gemm", &c, "%4.1f", "" ); - - // Free the objects. - bli_obj_free( &a ); - bli_obj_free( &b ); - bli_obj_free( &c ); - - - // - // Example 2: Perform a symmetric rank-k update (syrk) operation. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - dt = BLIS_DOUBLE; - m = 5; k = 3; rs = 0; cs = 0; - bli_obj_create( dt, m, m, rs, cs, &c ); - bli_obj_create( dt, m, k, rs, cs, &a ); - - // Set alpha. - alpha = (obj_t*)(&BLIS_ONE); - - // Initialize matrix operands. - bli_setm( &BLIS_ZERO, &c ); - bli_randm( &a ); - - // Mark matrix 'c' as symmetric and stored in the lower triangle, and - // then randomize that lower triangle. - bli_obj_set_struc( BLIS_SYMMETRIC, &c ); - bli_obj_set_uplo( BLIS_LOWER, &c ); - bli_randm( &c ); - - bli_printm( "a: set to random values", &a, "%4.1f", "" ); - bli_printm( "c: initial value (zeros in upper triangle)", &c, "%4.1f", "" ); - - // c := c + alpha * a * a^T, where 'c' is symmetric and lower-stored. - bli_syrk( alpha, &a, beta, &c ); - - bli_printm( "c: after syrk", &c, "%4.1f", "" ); - - // Free the objects. - bli_obj_free( &c ); - bli_obj_free( &a ); - - - // - // Example 3: Perform a symmetric matrix-matrix multiply (symm) operation. - // - - printf( "\n#\n# -- Example 3 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - dt = BLIS_DOUBLE; - m = 5; n = 6; rs = 0; cs = 0; - bli_obj_create( dt, m, m, rs, cs, &a ); - bli_obj_create( dt, m, n, rs, cs, &b ); - bli_obj_create( dt, m, n, rs, cs, &c ); - - // Set the scalars to use. - alpha = (obj_t*)(&BLIS_ONE); - beta = (obj_t*)(&BLIS_ONE); - - // Set the side operand. - side = BLIS_LEFT; - - // Initialize matrices 'b' and 'c'. - bli_setm( &BLIS_ONE, &b ); - bli_setm( &BLIS_ZERO, &c ); - - // Zero out all of matrix 'a'. This is optional, but will avoid possibly - // displaying junk values in the unstored triangle. - bli_setm( &BLIS_ZERO, &a ); - - // Mark matrix 'a' as symmetric and stored in the upper triangle, and - // then randomize that upper triangle. - bli_obj_set_struc( BLIS_SYMMETRIC, &a ); - bli_obj_set_uplo( BLIS_UPPER, &a ); - bli_randm( &a ); - - bli_printm( "a: randomized (zeros in lower triangle)", &a, "%4.1f", "" ); - bli_printm( "b: set to 1.0", &b, "%4.1f", "" ); - bli_printm( "c: initial value", &c, "%4.1f", "" ); - - // c := beta * c + alpha * a * b, where 'a' is symmetric and upper-stored. - // Note that the first 'side' operand indicates the side from which matrix - // 'a' is multiplied into 'b'. - bli_symm( side, alpha, &a, &b, beta, &c ); - - bli_printm( "c: after symm", &c, "%4.1f", "" ); - - // Free the objects. - bli_obj_free( &a ); - bli_obj_free( &b ); - bli_obj_free( &c ); - - - // - // Example 4: Perform a triangular matrix-matrix multiply (trmm) operation. - // - - printf( "\n#\n# -- Example 4 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - dt = BLIS_DOUBLE; - m = 5; n = 4; rs = 0; cs = 0; - bli_obj_create( dt, m, m, rs, cs, &a ); - bli_obj_create( dt, m, n, rs, cs, &b ); - - // Set the scalars to use. - alpha = (obj_t*)(&BLIS_ONE); - - // Set the side operand. - side = BLIS_LEFT; - - // Initialize matrix 'b'. - bli_setm( &BLIS_ONE, &b ); - - // Zero out all of matrix 'a'. This is optional, but will avoid possibly - // displaying junk values in the unstored triangle. - bli_setm( &BLIS_ZERO, &a ); - - // Mark matrix 'a' as triangular, stored in the lower triangle, and - // having a non-unit diagonal. Then randomize that lower triangle. - bli_obj_set_struc( BLIS_TRIANGULAR, &a ); - bli_obj_set_uplo( BLIS_LOWER, &a ); - bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); - bli_randm( &a ); - - bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" ); - bli_printm( "b: initial value", &b, "%4.1f", "" ); - - // b := alpha * a * b, where 'a' is triangular and lower-stored. - bli_trmm( side, alpha, &a, &b ); - - bli_printm( "x: after trmm", &b, "%4.1f", "" ); - - // Free the objects. - bli_obj_free( &a ); - bli_obj_free( &b ); - - - // - // Example 5: Perform a triangular solve with multiple right-hand sides - // (trsm) operation. - // - - printf( "\n#\n# -- Example 5 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - dt = BLIS_DOUBLE; - m = 5; n = 4; rs = 0; cs = 0; - bli_obj_create( dt, m, m, rs, cs, &a ); - bli_obj_create( dt, m, n, rs, cs, &b ); - bli_obj_create( dt, m, n, rs, cs, &c ); - - // Set the scalars to use. - alpha = (obj_t*)(&BLIS_ONE); - - // Set the side operand. - side = BLIS_LEFT; - - // Initialize matrix 'b'. - bli_setm( &BLIS_ONE, &b ); - - // Zero out all of matrix 'a'. This is optional, but will avoid possibly - // displaying junk values in the unstored triangle. - bli_setm( &BLIS_ZERO, &a ); - - // Mark matrix 'a' as triangular, stored in the lower triangle, and - // having a non-unit diagonal. Then randomize that lower triangle. - bli_obj_set_struc( BLIS_TRIANGULAR, &a ); - bli_obj_set_uplo( BLIS_LOWER, &a ); - bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); - bli_randm( &a ); - - // Load the diagonal. By setting the diagonal to something of greater - // absolute value than the off-diagonal elements, we increase the odds - // that the matrix is not singular (singular matrices have no inverse). - bli_shiftd( &BLIS_TWO, &a ); - - bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" ); - bli_printm( "b: initial value", &b, "%4.1f", "" ); - - // solve a * x = alpha * b, where 'a' is triangular and lower-stored, and - // overwrite b with the solution matrix x. - bli_trsm( side, alpha, &a, &b ); - - bli_printm( "b: after trsm", &b, "%4.1f", "" ); - - // We can confirm the solution by comparing the product of a and x to the - // original value of b. - bli_copym( &b, &c ); - bli_trmm( side, alpha, &a, &c ); - - bli_printm( "c: should equal initial value of b", &c, "%4.1f", "" ); - - // Free the objects. - bli_obj_free( &a ); - bli_obj_free( &b ); - bli_obj_free( &c ); - - - return hpx::finalize(); -} - -// ----------------------------------------------------------------------------- - -int main( int argc, char** argv ) { - return hpx::init(argc, argv); -} diff --git a/examples/hpx/oapi/Makefile b/examples/hpx/oapi/Makefile deleted file mode 100644 index 2b8bf0d7d4..0000000000 --- a/examples/hpx/oapi/Makefile +++ /dev/null @@ -1,48 +0,0 @@ -# BLIS -# An object-based framework for developing high-performance BLAS-like -# libraries. -# -# Copyright (C) 2014, The University of Texas at Austin -# Copyright (C) 2022 Tactical Computing Laboratories, LLC -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# - Neither the name of The University of Texas nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# --include $(SHARE_PATH)/common.mk - -all: - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=00obj_basic -o 00obj_basic 00obj_basic.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=01obj_attach -o 01obj_attach 01obj_attach.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=02obj_ij -o 02obj_ij 02obj_ij.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=03obj_view -o 03obj_view 03obj_view.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=04level0 -o 04level0 04level0.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=05level1v -o 05level1v 05level1v.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=06level1m -o 06level1m 06level1m.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=07level1m_diag -o 07level1m_diag 07level1m_diag.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=08level2 -o 08level2 08level2.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=09level3 -o09level3 09level3.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - -clean: - rm 00obj_basic 02obj_ij 04level0 06level1m 08level2 01obj_attach 03obj_view 05level1v 07level1m_diag 09level3 - diff --git a/examples/hpx/tapi/00level1v.cpp b/examples/hpx/tapi/00level1v.cpp deleted file mode 100644 index 2afed6d88d..0000000000 --- a/examples/hpx/tapi/00level1v.cpp +++ /dev/null @@ -1,192 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include "blis.h" - -#include -#include - -int hpx_main( int argc, char** argv ) -{ - double* x; - double* y; - double* z; - double* w; - double* a; - double alpha, beta, gamma; - dim_t m, n; - inc_t rs, cs; - - // Initialize some basic constants. - double zero = 0.0; - double one = 1.0; - double minus_one = -1.0; - - - // - // This file demonstrates working with vectors and the level-1v - // operations. - // - - - // - // Example 1: Create vectors and then broadcast (copy) scalar - // values to all elements. - // - - printf( "\n#\n# -- Example 1 --\n#\n\n" ); - - // Create a few vectors to work with. We make them all of the same length - // so that we can perform operations between them. - // NOTE: We've chosen to use row vectors here (1x4) instead of column - // vectors (4x1) to allow for easier reading of standard output (less - // scrolling). - m = 1; n = 4; rs = n; cs = 1; - x = static_cast(malloc( m * n * sizeof( double ) )); - y = static_cast(malloc( m * n * sizeof( double ) )); - z = static_cast(malloc( m * n * sizeof( double ) )); - w = static_cast(malloc( m * n * sizeof( double ) )); - a = static_cast(malloc( m * n * sizeof( double ) )); - - // Let's initialize some scalars. - alpha = 2.0; - beta = 0.2; - gamma = 3.0; - - printf( "alpha:\n%4.1f\n\n", alpha ); - printf( "beta:\n%4.1f\n\n", beta ); - printf( "gamma:\n%4.1f\n\n", gamma ); - printf( "\n" ); - - bli_dsetv( BLIS_NO_CONJUGATE, n, &one, x, 1 ); - bli_dsetv( BLIS_NO_CONJUGATE, n, &alpha, y, 1 ); - bli_dsetv( BLIS_NO_CONJUGATE, n, &zero, z, 1 ); - - // Note that we can use printv or printm to print vectors since vectors - // are also matrices. We choose to use printm because it honors the - // orientation of the vector (row or column) when printing, whereas - // printv always prints vectors as column vectors regardless of their - // they are 1 x n or n x 1. - bli_dprintm( "x := 1.0", m, n, x, rs, cs, "%4.1f", "" ); - bli_dprintm( "y := alpha", m, n, y, rs, cs, "%4.1f", "" ); - bli_dprintm( "z := 0.0", m, n, z, rs, cs, "%4.1f", "" ); - - - // - // Example 2: Randomize a vector. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - // Set a vector to random values. - bli_drandv( n, w, 1 ); - - bli_dprintm( "x := randv()", m, n, w, rs, cs, "%4.1f", "" ); - - - // - // Example 3: Perform various element-wise operations on vectors. - // - - printf( "\n#\n# -- Example 3 --\n#\n\n" ); - - // Copy a vector. - bli_dcopyv( BLIS_NO_CONJUGATE, n, w, 1, a, 1 ); - bli_dprintm( "a := w", m, n, a, rs, cs, "%4.1f", "" ); - - // Add and subtract vectors. - bli_daddv( BLIS_NO_CONJUGATE, n, y, 1, a, 1 ); - bli_dprintm( "a := a + y", m, n, a, rs, cs, "%4.1f", "" ); - - bli_dsubv( BLIS_NO_CONJUGATE, n, w, 1, a, 1 ); - bli_dprintm( "a := a + w", m, n, a, rs, cs, "%4.1f", "" ); - - // Scale a vector (destructive). - bli_dscalv( BLIS_NO_CONJUGATE, n, &beta, a, 1 ); - bli_dprintm( "a := beta * a", m, n, a, rs, cs, "%4.1f", "" ); - - // Scale a vector (non-destructive). - bli_dscal2v( BLIS_NO_CONJUGATE, n, &gamma, a, 1, z, 1 ); - bli_dprintm( "z := gamma * a", m, n, z, rs, cs, "%4.1f", "" ); - - // Scale and accumulate between vectors. - bli_daxpyv( BLIS_NO_CONJUGATE, n, &alpha, w, 1, x, 1 ); - bli_dprintm( "x := x + alpha * w", m, n, x, rs, cs, "%4.1f", "" ); - - bli_dxpbyv( BLIS_NO_CONJUGATE, n, w, 1, &minus_one, x, 1 ); - bli_dprintm( "x := -1.0 * x + w", m, n, x, rs, cs, "%4.1f", "" ); - - // Invert a vector element-wise. - bli_dinvertv( n, y, 1 ); - bli_dprintm( "y := 1 / y", m, n, y, rs, cs, "%4.1f", "" ); - - // Swap two vectors. - bli_dswapv( n, x, 1, y, 1 ); - bli_dprintm( "x (after swapping with y)", m, n, x, rs, cs, "%4.1f", "" ); - bli_dprintm( "y (after swapping with x)", m, n, y, rs, cs, "%4.1f", "" ); - - - // - // Example 4: Perform contraction-like operations on vectors. - // - - printf( "\n#\n# -- Example 4 --\n#\n\n" ); - - // Perform a dot product. - bli_ddotv( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, n, a, 1, z, 1, &gamma ); - printf( "gamma := a * z (dot product):\n%5.2f\n\n", gamma ); - - // Perform an extended dot product. - bli_ddotxv( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, n, &alpha, a, 1, z, 1, &one, &gamma ); - printf( "gamma := 1.0 * gamma + alpha * a * z (accumulate scaled dot product):\n%5.2f\n\n", gamma ); - - - // Free the memory obtained via malloc(). - free( x ); - free( y ); - free( z ); - free( w ); - free( a ); - - return hpx::finalize(); -} - -// ----------------------------------------------------------------------------- - -int main(int argc, char** argv) { - return hpx::init(argc, argv); -} - diff --git a/examples/hpx/tapi/01level1m.cpp b/examples/hpx/tapi/01level1m.cpp deleted file mode 100644 index 5c8ca8bb8f..0000000000 --- a/examples/hpx/tapi/01level1m.cpp +++ /dev/null @@ -1,225 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include "blis.h" - -#include -#include - -int hpx_main( int argc, char** argv ) -{ - double* a; - double* b; - double* c; - double* d; - double* e; - double* f; - dcomplex* g; - dcomplex* h; - double alpha, beta, gamma; - dim_t m, n; - inc_t rs, cs; - - // Initialize some basic constants. - double zero = 0.0; - double one = 1.0; - double minus_one = -1.0; - dcomplex minus_one_z = {-1.0, 0.0}; - - - // - // This file demonstrates working with matrices and the level-1m - // operations. - // - - - // - // Example 1: Create matrices and then broadcast (copy) scalar - // values to all elements. - // - - printf( "\n#\n# -- Example 1 --\n#\n\n" ); - - // Create a few matrices to work with. We make them all of the same - // dimensions so that we can perform operations between them. - m = 2; n = 3; rs = 1; cs = m; - a = static_cast(malloc( m * n * sizeof( double ) )); - b = static_cast(malloc( m * n * sizeof( double ) )); - c = static_cast(malloc( m * n * sizeof( double ) )); - d = static_cast(malloc( m * n * sizeof( double ) )); - e = static_cast(malloc( m * n * sizeof( double ) )); - - // Let's initialize some scalars. - alpha = 2.0; - beta = 0.2; - gamma = 3.0; - - printf( "alpha:\n%4.1f\n\n", alpha ); - printf( "beta:\n%4.1f\n\n", beta ); - printf( "gamma:\n%4.1f\n\n", gamma ); - printf( "\n" ); - - // Matrices, like vectors, can set by "broadcasting" a constant to every - // element. Note that the second argument (0) is the diagonal offset. - // The diagonal offset is only used when the uplo value is something other - // than BLIS_DENSE (e.g. BLIS_LOWER or BLIS_UPPER). - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, &one, a, rs, cs ); - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, &alpha, b, rs, cs ); - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, &zero, c, rs, cs ); - - bli_dprintm( "a := 1.0", m, n, a, rs, cs, "%4.1f", "" ); - bli_dprintm( "b := alpha", m, n, b, rs, cs, "%4.1f", "" ); - bli_dprintm( "c := 0.0", m, n, c, rs, cs, "%4.1f", "" ); - - - // - // Example 2: Randomize a matrix object. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - bli_drandm( 0, BLIS_DENSE, m, n, e, rs, cs ); - - bli_dprintm( "e (randomized):", m, n, e, rs, cs, "%4.1f", "" ); - - - // - // Example 3: Perform element-wise operations on matrices. - // - - printf( "\n#\n# -- Example 3 --\n#\n\n" ); - - // Copy a matrix. - bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, - m, n, e, rs, cs, d, rs, cs ); - bli_dprintm( "d := e", m, n, d, rs, cs, "%4.1f", "" ); - - // Add and subtract vectors. - bli_daddm( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, - m, n, a, rs, cs, d, rs, cs ); - bli_dprintm( "d := d + a", m, n, d, rs, cs, "%4.1f", "" ); - - bli_dsubm( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, - m, n, a, rs, cs, e, rs, cs ); - bli_dprintm( "e := e - a", m, n, e, rs, cs, "%4.1f", "" ); - - // Scale a matrix (destructive). - bli_dscalm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, &alpha, e, rs, cs ); - bli_dprintm( "e := alpha * e", m, n, e, rs, cs, "%4.1f", "" ); - - // Scale a matrix (non-destructive). - bli_dscal2m( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, - m, n, &beta, e, rs, cs, c, rs, cs ); - bli_dprintm( "c := beta * e", m, n, c, rs, cs, "%4.1f", "" ); - - // Scale and accumulate between matrices. - bli_daxpym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, - m, n, &alpha, a, rs, cs, c, rs, cs ); - bli_dprintm( "c := alpha * a", m, n, c, rs, cs, "%4.1f", "" ); - - - // - // Example 4: Copy and transpose a matrix. - // - - printf( "\n#\n# -- Example 4 --\n#\n\n" ); - - // Create an n-by-m matrix into which we can copy-transpose an m-by-n - // matrix. - f = static_cast(malloc( n * m * sizeof( double ) )); - dim_t rsf = 1, csf = n; - - // Initialize all of 'f' to -1.0 to simulate junk values. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - n, m, &minus_one, f, rsf, csf ); - - bli_dprintm( "e:", m, n, e, rs, cs, "%4.1f", "" ); - bli_dprintm( "f (initial value):", n, m, f, rsf, csf, "%4.1f", "" ); - - - // Copy 'e' to 'f', transposing 'e' in the process. Notice that we haven't - // modified any properties of 'd'. It's the source operand that matters - // when marking an operand for transposition, not the destination. - bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_TRANSPOSE, - n, m, e, rs, cs, f, rsf, csf ); - - bli_dprintm( "f (copied value):", n, m, f, rsf, csf, "%4.1f", "" ); - - - // - // Example 5: Copy and Hermitian-transpose a matrix. - // - - printf( "\n#\n# -- Example 5 --\n#\n\n" ); - - g = static_cast(malloc( m * n * sizeof(dcomplex) )); - h = static_cast(malloc( n * m * sizeof(dcomplex) )); - - bli_zrandm( 0, BLIS_DENSE, m, n, g, rs, cs ); - - bli_zsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - n, m, &minus_one_z, h, rsf, csf ); - - bli_zprintm( "g:", m, n, g, rs, cs, "%4.1f", "" ); - bli_zprintm( "h (initial value):", n, m, h, rsf, csf, "%4.1f", "" ); - - bli_zcopym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_CONJ_TRANSPOSE, - n, m, g, rs, cs, h, rsf, csf ); - - bli_zprintm( "h (copied value):", n, m, h, rsf, csf, "%4.1f", "" ); - - - // Free the memory obtained via malloc(). - free( a ); - free( b ); - free( c ); - free( d ); - free( e ); - free( f ); - free( g ); - free( h ); - - return hpx::finalize(); -} - -// ----------------------------------------------------------------------------- -int main(int argc, char ** argv) { - return hpx::init(argc, argv); -} diff --git a/examples/hpx/tapi/02level1m_diag.cpp b/examples/hpx/tapi/02level1m_diag.cpp deleted file mode 100644 index 5ab11eec62..0000000000 --- a/examples/hpx/tapi/02level1m_diag.cpp +++ /dev/null @@ -1,252 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include "blis.h" - -#include -#include - -int hpx_main( int argc, char** argv ) -{ - double* a; - double* b; - double* c; - double* d; - double* e; - double* h; - dim_t m, n; - inc_t rs, cs; - - // Initialize some basic constants. - double zero = 0.0; - double minus_one = -1.0; - - - // - // This file demonstrates level-1m operations on structured matrices. - // - - - // - // Example 1: Initialize the upper triangle of a matrix to random values. - // - - printf( "\n#\n# -- Example 1 --\n#\n\n" ); - - // Create a matrix to work with. - m = 5; n = 5; rs = 1; cs = m; - a = static_cast(malloc( m * n * sizeof( double ) )); - - // Set the upper triangle to random values. - bli_drandm( 0, BLIS_UPPER, m, n, a, rs, cs ); - - bli_dprintm( "a: randomize upper part (lower part may contain garbage)", - m, n, a, rs, cs, "%4.1f", "" ); - - - // - // Example 2: Initialize the upper triangle of a matrix to random values - // but also explicitly set the strictly lower triangle to zero. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - // Create a matrix to work with. - m = 5; n = 5; rs = 1; cs = m; - b = static_cast(malloc( m * n * sizeof( double ) )); - - // Set the upper triangle to random values. - bli_drandm( 0, BLIS_UPPER, m, n, b, rs, cs ); - - // Set the strictly lower triangle of 'b' to zero (by setting the lower - // triangle of 'bl' to zero). - bli_dsetm( BLIS_NO_CONJUGATE, -1, BLIS_NONUNIT_DIAG, BLIS_LOWER, - m, n, &zero, b, rs, cs ); - - bli_dprintm( "b: randomize upper part; set strictly lower part to 0.0)", - m, n, b, rs, cs, "%4.1f", "" ); - - // You may not see the effect of setting the strictly lower part to zero, - // since those values may already be zero (instead of random junk). So - // let's set it to something you'll notice, like -1.0. - bli_dsetm( BLIS_NO_CONJUGATE, -1, BLIS_NONUNIT_DIAG, BLIS_LOWER, - m, n, &minus_one, b, rs, cs ); - - bli_dprintm( "b: randomize upper part; set strictly lower part to -1.0)", - m, n, b, rs, cs, "%4.1f", "" ); - - - // - // Example 3: Copy the lower triangle of an existing matrix to a newly - // created (but otherwise uninitialized) matrix. - // - - printf( "\n#\n# -- Example 3 --\n#\n\n" ); - - // Create a matrix to work with. - m = 5; n = 5; rs = 1; cs = m; - c = static_cast(malloc( m * n * sizeof( double ) )); - - bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_LOWER, BLIS_NO_TRANSPOSE, - m, n, b, rs, cs, c, rs, cs ); - - bli_dprintm( "c: copy lower part of b (upper part may contain garbage)", - m, n, c, rs, cs, "%4.1f", "" ); - - bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_LOWER, BLIS_NO_TRANSPOSE, - m, n, b, rs, cs, a, rs, cs ); - - bli_dprintm( "a: copy lower triangle of b to upper triangular a", - m, n, a, rs, cs, "%4.1f", "" ); - - - // - // Example 4: Copy the lower triangle of an existing object into the - // upper triangle of an existing object. - // - - printf( "\n#\n# -- Example 4 --\n#\n\n" ); - - // Create a matrix to work with. - m = 5; n = 5; rs = 1; cs = m; - d = static_cast(malloc( m * n * sizeof( double ) )); - - // Let's start by setting entire destination matrix to zero. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, &zero, d, rs, cs ); - - bli_dprintm( "d: initial value (all zeros)", - m, n, d, rs, cs, "%4.1f", "" ); - - // Let's change a few values of b manually so we can later see the full - // effect of the transposition. - bli_dsetijm( 2.0, 0.0, 2, 0, b, rs, cs ); - bli_dsetijm( 3.0, 0.0, 3, 0, b, rs, cs ); - bli_dsetijm( 4.0, 0.0, 4, 0, b, rs, cs ); - bli_dsetijm( 3.1, 0.0, 2, 1, b, rs, cs ); - bli_dsetijm( 3.2, 0.0, 3, 2, b, rs, cs ); - - bli_dprintm( "b:", - m, n, b, rs, cs, "%4.1f", "" ); - - bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_LOWER, BLIS_TRANSPOSE, - m, n, b, rs, cs, d, rs, cs ); - - bli_dprintm( "d: transpose of lower triangle of b copied to d", - m, n, d, rs, cs, "%4.1f", "" ); - - - // - // Example 5: Create a rectangular matrix (m > n) with a lower trapezoid - // containing random values, then set the strictly upper - // triangle to zeros. - // - - printf( "\n#\n# -- Example 5 --\n#\n\n" ); - - // Create a matrix to work with. - m = 6; n = 4; rs = 1; cs = m; - e = static_cast(malloc( m * n * sizeof( double ) )); - - // Initialize the entire matrix to -1.0 to simulate junk values. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, &minus_one, e, rs, cs ); - - bli_dprintm( "e: initial value (all -1.0)", - m, n, e, rs, cs, "%4.1f", "" ); - - // Randomize the lower trapezoid. - bli_drandm( 0, BLIS_LOWER, m, n, e, rs, cs ); - - bli_dprintm( "e: after lower trapezoid randomized", - m, n, e, rs, cs, "%4.1f", "" ); - - // Set the upper triangle to zero. - bli_dsetm( BLIS_NO_CONJUGATE, 1, BLIS_NONUNIT_DIAG, BLIS_UPPER, - m, n, &zero, e, rs, cs ); - - bli_dprintm( "e: after upper triangle set to zero", - m, n, e, rs, cs, "%4.1f", "" ); - - - // - // Example 6: Create an upper Hessenberg matrix of random values and then - // set the "unstored" values to zero. - // - - printf( "\n#\n# -- Example 6 --\n#\n\n" ); - - // Create a matrix to work with. - m = 5; n = 5; rs = 1; cs = m; - h = static_cast(malloc( m * n * sizeof( double ) )); - - // Initialize the entire matrix to -1.0 to simulate junk values. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, &minus_one, h, rs, cs ); - - bli_dprintm( "h: initial value (all -1.0)", - m, n, h, rs, cs, "%4.1f", "" ); - - // Randomize the elements on and above the first subdiagonal. - bli_drandm( -1, BLIS_UPPER, m, n, h, rs, cs ); - - bli_dprintm( "h: after randomizing above first subdiagonal", - m, n, h, rs, cs, "%4.1f", "" ); - - // Set the region strictly below the first subdiagonal (on or below - // the second subdiagonal) to zero. - bli_dsetm( BLIS_NO_CONJUGATE, -2, BLIS_NONUNIT_DIAG, BLIS_LOWER, - m, n, &zero, h, rs, cs ); - - bli_dprintm( "h: after setting elements below first subdiagonal to zero", - m, n, h, rs, cs, "%4.1f", "" ); - - - // Free the memory obtained via malloc(). - free( a ); - free( b ); - free( c ); - free( d ); - free( e ); - free( h ); - - return hpx::finalize(); -} - -// ----------------------------------------------------------------------------- -int main(int argc, char ** argv) { - return hpx::init(argc, argv); -} diff --git a/examples/hpx/tapi/03level2.cpp b/examples/hpx/tapi/03level2.cpp deleted file mode 100644 index 991dbb3e2e..0000000000 --- a/examples/hpx/tapi/03level2.cpp +++ /dev/null @@ -1,322 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include "blis.h" - -#include -#include - -int hpx_main( int argc, char** argv ) -{ - double* a; - double* x; - double* y; - double* b; - double alpha, beta; - dim_t m, n; - inc_t rs, cs; - - // Initialize some basic constants. - double zero = 0.0; - double one = 1.0; - double two = 2.0; - double minus_one = -1.0; - - - // - // This file demonstrates level-2 operations. - // - - - // - // Example 1: Perform a general rank-1 update (ger) operation. - // - - printf( "\n#\n# -- Example 1 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - m = 4; n = 5; rs = 1; cs = m; - a = static_cast(malloc( m * n * sizeof( double ) )); - x = static_cast(malloc( m * 1 * sizeof( double ) )); - y = static_cast(malloc( 1 * n * sizeof( double ) )); - - // Let's initialize some scalars. - alpha = 1.0; - - // Initialize vectors 'x' and 'y'. - bli_drandv( m, x, 1 ); - bli_dsetv( BLIS_NO_CONJUGATE, n, &minus_one, y, 1 ); - - // Initialize 'a' to 1.0. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, &one, a, rs, cs ); - - bli_dprintm( "x: set to random values", m, 1, x, 1, m, "%4.1f", "" ); - bli_dprintm( "y: set to -1.0", 1, n, y, n, 1, "%4.1f", "" ); - bli_dprintm( "a: intial value", m, n, a, rs, cs, "%4.1f", "" ); - - // a := a + alpha * x * y, where 'a' is general. - bli_dger( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, - m, n, &alpha, x, 1, y, 1, a, rs, cs ); - - bli_dprintm( "a: after ger", m, n, a, rs, cs, "%4.1f", "" ); - - // Free the memory obtained via malloc(). - free( a ); - free( x ); - free( y ); - - - // - // Example 2: Perform a general matrix-vector multiply (gemv) operation. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - m = 4; n = 5; rs = 1; cs = m; - a = static_cast(malloc( m * n * sizeof( double ) )); - x = static_cast(malloc( 1 * n * sizeof( double ) )); - y = static_cast(malloc( 1 * m * sizeof( double ) )); - - // Set the scalars to use. - alpha = 1.0; - beta = 1.0; - - // Initialize vectors 'x' and 'y'. - bli_dsetv( BLIS_NO_CONJUGATE, n, &one, x, 1 ); - bli_dsetv( BLIS_NO_CONJUGATE, m, &zero, y, 1 ); - - // Randomize 'a'. - bli_drandm( 0, BLIS_DENSE, m, n, a, rs, cs ); - - bli_dprintm( "a: randomized", m, n, a, rs, cs, "%4.1f", "" ); - bli_dprintm( "x: set to 1.0", 1, n, x, n, 1, "%4.1f", "" ); - bli_dprintm( "y: intial value", 1, m, y, m, 1, "%4.1f", "" ); - - // y := beta * y + alpha * a * x, where 'a' is general. - bli_dgemv( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, - m, n, &alpha, a, rs, cs, x, 1, &beta, y, 1 ); - - bli_dprintm( "y: after gemv", 1, m, y, m, 1, "%4.1f", "" ); - - // Free the memory obtained via malloc(). - free( a ); - free( x ); - free( y ); - - - // - // Example 3: Perform a symmetric rank-1 update (syr) operation. - // - - printf( "\n#\n# -- Example 3 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - m = 5; rs = 1; cs = 5; - a = static_cast(malloc( m * m * sizeof( double ) )); - x = static_cast(malloc( 1 * m * sizeof( double ) )); - - // Set alpha. - alpha = 1.0; - - // Initialize vector 'x'. - bli_drandv( m, x, 1 ); - - // Zero out all of matrix 'a'. This is optional, but will avoid possibly - // displaying junk values in the unstored triangle. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, m, &zero, a, rs, cs ); - - // Randomize the lower triangle of 'a'. - bli_drandm( 0, BLIS_LOWER, m, m, a, rs, cs ); - - bli_dprintm( "x: set to random values", 1, m, x, m, 1, "%4.1f", "" ); - bli_dprintm( "a: initial value (zeros in upper triangle)", m, m, a, 1, m, "%4.1f", "" ); - - // a := a + alpha * x * x^T, where 'a' is symmetric and lower-stored. - bli_dsyr( BLIS_LOWER, BLIS_NO_CONJUGATE, m, &alpha, x, 1, a, rs, cs ); - - bli_dprintm( "a: after syr", m, m, a, 1, m, "%4.1f", "" ); - - // Free the memory obtained via malloc(). - free( a ); - free( x ); - - - // - // Example 4: Perform a symmetric matrix-vector multiply (symv) operation. - // - - printf( "\n#\n# -- Example 4 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - m = 5;; rs = 1; cs = m; - a = static_cast(malloc( m * m * sizeof( double ) )); - x = static_cast(malloc( 1 * m * sizeof( double ) )); - y = static_cast(malloc( 1 * m * sizeof( double ) )); - - // Set the scalars to use. - alpha = 1.0; - beta = 1.0; - - // Initialize vectors 'x' and 'y'. - bli_dsetv( BLIS_NO_CONJUGATE, m, &one, x, 1 ); - bli_dsetv( BLIS_NO_CONJUGATE, m, &zero, y, 1 ); - - // Zero out all of matrix 'a'. This is optional, but will avoid possibly - // displaying junk values in the unstored triangle. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, m, &zero, a, rs, cs ); - - // Randomize 'a'. - bli_drandm( 0, BLIS_UPPER, m, m, a, rs, cs ); - - bli_dprintm( "a: randomized (zeros in lower triangle)", m, m, a, rs, cs, "%4.1f", "" ); - bli_dprintm( "x: set to 1.0", 1, m, x, m, 1, "%4.1f", "" ); - bli_dprintm( "y: intial value", 1, m, y, m, 1, "%4.1f", "" ); - - // y := beta * y + alpha * a * x, where 'a' is symmetric and upper-stored. - bli_dsymv( BLIS_UPPER, (conj_t)BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, - m, &alpha, a, rs, cs, x, 1, &beta, y, 1 ); - - bli_dprintm( "y: after symv", 1, m, y, m, 1, "%4.1f", "" ); - - // Free the memory obtained via malloc(). - free( a ); - free( x ); - free( y ); - - - // - // Example 5: Perform a triangular matrix-vector multiply (trmv) operation. - // - - printf( "\n#\n# -- Example 5 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - m = 5;; rs = 1; cs = m; - a = static_cast(malloc( m * m * sizeof( double ) )); - x = static_cast(malloc( 1 * m * sizeof( double ) )); - - // Set the scalars to use. - alpha = 1.0; - - // Initialize vector 'x'. - bli_dsetv( BLIS_NO_CONJUGATE, m, &one, x, 1 ); - - // Zero out all of matrix 'a'. This is optional, but will avoid possibly - // displaying junk values in the unstored triangle. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, m, &zero, a, rs, cs ); - - // Randomize 'a'. - bli_drandm( 0, BLIS_LOWER, m, m, a, rs, cs ); - - bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rs, cs, "%4.1f", "" ); - bli_dprintm( "x: intial value", 1, m, x, m, 1, "%4.1f", "" ); - - // x := alpha * a * x, where 'a' is triangular and lower-stored. - bli_dtrmv( BLIS_LOWER, BLIS_NO_TRANSPOSE, BLIS_NONUNIT_DIAG, - m, &alpha, a, rs, cs, x, 1 ); - - bli_dprintm( "x: after trmv", 1, m, x, m, 1, "%4.1f", "" ); - - // Free the memory obtained via malloc(). - free( a ); - free( x ); - - - // - // Example 6: Perform a triangular solve (trsv) operation. - // - - printf( "\n#\n# -- Example 6 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - m = 5;; rs = 1; cs = m; - a = static_cast(malloc( m * m * sizeof( double ) )); - b = static_cast(malloc( 1 * m * sizeof( double ) )); - y = static_cast(malloc( 1 * m * sizeof( double ) )); - - // Set the scalars to use. - alpha = 1.0; - - // Initialize vector 'x'. - bli_dsetv( BLIS_NO_CONJUGATE, m, &one, b, 1 ); - - // Zero out all of matrix 'a'. This is optional, but will avoid possibly - // displaying junk values in the unstored triangle. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, m, &zero, a, rs, cs ); - - // Randomize 'a'. - bli_drandm( 0, BLIS_LOWER, m, m, a, rs, cs ); - - // Load the diagonal. By setting the diagonal to something of greater - // absolute value than the off-diagonal elements, we increase the odds - // that the matrix is not singular (singular matrices have no inverse). - bli_dshiftd( 0, m, m, &two, a, rs, cs ); - - bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rs, cs, "%4.1f", "" ); - bli_dprintm( "b: intial value", 1, m, b, m, 1, "%4.1f", "" ); - - // x := alpha * a * x, where 'a' is triangular and lower-stored. - bli_dtrsv( BLIS_LOWER, BLIS_NO_TRANSPOSE, BLIS_NONUNIT_DIAG, - m, &alpha, a, rs, cs, x, 1 ); - - bli_dprintm( "b: after trsv", 1, m, b, m, 1, "%4.1f", "" ); - - // We can confirm the solution by comparing the product of a and x to the - // original value of b. - bli_dcopyv( (conj_t)BLIS_NO_TRANSPOSE, m, b, 1, y, 1 ); - bli_dtrmv( BLIS_LOWER, BLIS_NO_TRANSPOSE, BLIS_NONUNIT_DIAG, - m, &alpha, a, rs, cs, y, 1 ); - - bli_dprintm( "y: should equal initial value of b", 1, m, y, m, 1, "%4.1f", "" ); - - // Free the memory obtained via malloc(). - free( a ); - free( b ); - free( y ); - - - return hpx::finalize(); -} - -int main(int argc, char ** argv) { - return hpx::init(argc, argv); -} diff --git a/examples/hpx/tapi/04level3.cpp b/examples/hpx/tapi/04level3.cpp deleted file mode 100644 index 8a3bb84131..0000000000 --- a/examples/hpx/tapi/04level3.cpp +++ /dev/null @@ -1,350 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include "blis.h" - -#include -#include - -int hpx_main( int argc, char** argv ) -{ - dim_t m, n, k; - inc_t rsa, csa; - inc_t rsb, csb; - inc_t rsc, csc; - - double* a; - double* b; - double* c; - double alpha, beta; - - // Initialize some basic constants. - double zero = 0.0; - double one = 1.0; - double two = 2.0; - - - // - // This file demonstrates level-3 operations. - // - - - // - // Example 1: Perform a general matrix-matrix multiply (gemm) operation. - // - - printf( "\n#\n# -- Example 1 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - m = 4; n = 5; k = 3; - rsc = 1; csc = m; - rsa = 1; csa = m; - rsb = 1; csb = k; - c = static_cast(malloc( m * n * sizeof( double ) )); - a = static_cast(malloc( m * k * sizeof( double ) )); - b = static_cast(malloc( k * n * sizeof( double ) )); - - // Set the scalars to use. - alpha = 1.0; - beta = 1.0; - - // Initialize the matrix operands. - bli_drandm( 0, BLIS_DENSE, m, k, a, rsa, csa ); - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - k, n, &one, b, rsb, csb ); - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, &zero, c, rsc, csc ); - - bli_dprintm( "a: randomized", m, k, a, rsa, csa, "%4.1f", "" ); - bli_dprintm( "b: set to 1.0", k, n, b, rsb, csb, "%4.1f", "" ); - bli_dprintm( "c: initial value", m, n, c, rsc, csc, "%4.1f", "" ); - - // c := beta * c + alpha * a * b, where 'a', 'b', and 'c' are general. - bli_dgemm( BLIS_NO_TRANSPOSE, BLIS_NO_TRANSPOSE, - m, n, k, &alpha, a, rsa, csa, b, rsb, csb, - &beta, c, rsc, csc ); - - bli_dprintm( "c: after gemm", m, n, c, rsc, csc, "%4.1f", "" ); - - // Free the memory obtained via malloc(). - free( a ); - free( b ); - free( c ); - - - // - // Example 1b: Perform a general matrix-matrix multiply (gemm) operation - // with the left input operand (matrix A) transposed. - // - - printf( "\n#\n# -- Example 1b --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - m = 4; n = 5; k = 3; - rsc = 1; csc = m; - rsa = 1; csa = k; - rsb = 1; csb = k; - c = static_cast(malloc( m * n * sizeof( double ) )); - a = static_cast(malloc( k * m * sizeof( double ) )); - b = static_cast(malloc( k * n * sizeof( double ) )); - - // Set the scalars to use. - alpha = 1.0; - beta = 1.0; - - // Initialize the matrix operands. - bli_drandm( 0, BLIS_DENSE, k, m, a, rsa, csa ); - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - k, n, &one, b, rsb, csb ); - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, &zero, c, rsc, csc ); - - bli_dprintm( "a: randomized", k, m, a, rsa, csa, "%4.1f", "" ); - bli_dprintm( "b: set to 1.0", k, n, b, rsb, csb, "%4.1f", "" ); - bli_dprintm( "c: initial value", m, n, c, rsc, csc, "%4.1f", "" ); - - // c := beta * c + alpha * a^T * b, where 'a', 'b', and 'c' are general. - bli_dgemm( BLIS_TRANSPOSE, BLIS_NO_TRANSPOSE, - m, n, k, &alpha, a, rsa, csa, b, rsb, csb, - &beta, c, rsc, csc ); - - bli_dprintm( "c: after gemm", m, n, c, rsc, csc, "%4.1f", "" ); - - // Free the memory obtained via malloc(). - free( a ); - free( b ); - free( c ); - - - // - // Example 2: Perform a symmetric rank-k update (syrk) operation. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - m = 5; k = 3; - rsc = 1; csc = m; - rsa = 1; csa = m; - c = static_cast(malloc( m * m * sizeof( double ) )); - a = static_cast(malloc( m * k * sizeof( double ) )); - - // Set the scalars to use. - alpha = 1.0; - - // Initialize the matrix operands. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, m, &zero, c, rsc, csc ); - bli_drandm( 0, BLIS_DENSE, m, k, a, rsa, csa ); - - // Randomize the lower triangle of 'c'. - bli_drandm( 0, BLIS_LOWER, m, n, c, rsc, csc ); - - bli_dprintm( "a: set to random values", m, k, a, rsa, csa, "%4.1f", "" ); - bli_dprintm( "c: initial value (zeros in upper triangle)", m, m, c, rsc, csc, "%4.1f", "" ); - - // c := c + alpha * a * a^T, where 'c' is symmetric and lower-stored. - bli_dsyrk( BLIS_LOWER, BLIS_NO_TRANSPOSE, - m, k, &alpha, a, rsa, csa, - &beta, c, rsc, csc ); - - bli_dprintm( "c: after syrk", m, m, c, rsc, csc, "%4.1f", "" ); - - // Free the memory obtained via malloc(). - free( a ); - free( c ); - - - // - // Example 3: Perform a symmetric matrix-matrix multiply (symm) operation. - // - - printf( "\n#\n# -- Example 3 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - m = 5; n = 6; - rsc = 1; csc = m; - rsa = 1; csa = m; - rsb = 1; csb = m; - c = static_cast(malloc( m * n * sizeof( double ) )); - a = static_cast(malloc( m * m * sizeof( double ) )); - b = static_cast(malloc( m * n * sizeof( double ) )); - - // Set the scalars to use. - alpha = 1.0; - beta = 1.0; - - // Initialize matrices 'b' and 'c'. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, &one, b, rsb, csb ); - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, &zero, c, rsc, csc ); - - // Zero out all of matrix 'a'. This is optional, but will avoid possibly - // displaying junk values in the unstored triangle. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, m, &zero, a, rsa, csa ); - - // Randomize the upper triangle of 'a'. - bli_drandm( 0, BLIS_UPPER, m, m, a, rsa, csa ); - - bli_dprintm( "a: randomized (zeros in lower triangle)", m, m, a, rsa, csa, "%4.1f", "" ); - bli_dprintm( "b: set to 1.0", m, n, b, rsb, csb, "%4.1f", "" ); - bli_dprintm( "c: initial value", m, n, c, rsc, csc, "%4.1f", "" ); - - // c := beta * c + alpha * a * b, where 'a' is symmetric and upper-stored. - bli_dsymm( BLIS_LEFT, BLIS_UPPER, BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, - m, n, &alpha, a, rsa, csa, b, rsb, csb, - &beta, c, rsc, csc ); - - bli_dprintm( "c: after symm", m, n, c, rsc, csc, "%4.1f", "" ); - - // Free the memory obtained via malloc(). - free( a ); - free( b ); - free( c ); - - - // - // Example 4: Perform a triangular matrix-matrix multiply (trmm) operation. - // - - printf( "\n#\n# -- Example 4 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - m = 5; n = 4; - rsa = 1; csa = m; - rsb = 1; csb = m; - a = static_cast(malloc( m * m * sizeof( double ) )); - b = static_cast(malloc( m * n * sizeof( double ) )); - - // Set the scalars to use. - alpha = 1.0; - - // Initialize matrix 'b'. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, &one, b, rsb, csb ); - - // Zero out all of matrix 'a'. This is optional, but will avoid possibly - // displaying junk values in the unstored triangle. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, m, &zero, a, rsa, csa ); - - // Randomize the lower triangle of 'a'. - bli_drandm( 0, BLIS_LOWER, m, m, a, rsa, csa ); - - bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rsa, csa, "%4.1f", "" ); - bli_dprintm( "b: initial value", m, n, b, rsb, csb, "%4.1f", "" ); - - // b := alpha * a * b, where 'a' is triangular and lower-stored. - bli_dtrmm( BLIS_LEFT, BLIS_LOWER, (trans_t)BLIS_NONUNIT_DIAG, (diag_t)BLIS_NO_TRANSPOSE, - m, n, &alpha, a, rsa, csa, b, rsb, csb ); - - bli_dprintm( "b: after trmm", m, n, b, rsb, csb, "%4.1f", "" ); - - // Free the memory obtained via malloc(). - free( a ); - free( b ); - - - // - // Example 5: Perform a triangular solve with multiple right-hand sides - // (trsm) operation. - // - - printf( "\n#\n# -- Example 5 --\n#\n\n" ); - - // Create some matrix and vector operands to work with. - m = 5; n = 4; - rsa = 1; csa = m; - rsb = 1; csb = m; - rsc = 1; csc = m; - a = static_cast(malloc( m * m * sizeof( double ) )); - b = static_cast(malloc( m * n * sizeof( double ) )); - c = static_cast(malloc( m * n * sizeof( double ) )); - - // Set the scalars to use. - alpha = 1.0; - - // Initialize matrix 'b'. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, &one, b, rsb, csb ); - - // Zero out all of matrix 'a'. This is optional, but will avoid possibly - // displaying junk values in the unstored triangle. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, m, &zero, a, rsa, csa ); - - // Randomize the lower triangle of 'a'. - bli_drandm( 0, BLIS_LOWER, m, m, a, rsa, csa ); - - // Load the diagonal. By setting the diagonal to something of greater - // absolute value than the off-diagonal elements, we increase the odds - // that the matrix is not singular (singular matrices have no inverse). - bli_dshiftd( 0, m, m, &two, a, rsa, csa ); - - bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rsa, csa, "%4.1f", "" ); - bli_dprintm( "b: initial value", m, n, b, rsb, csb, "%4.1f", "" ); - - // solve a * x = alpha * b, where 'a' is triangular and lower-stored, and - // overwrite b with the solution matrix x. - bli_dtrsm( BLIS_LEFT, BLIS_LOWER, (trans_t)BLIS_NONUNIT_DIAG, (diag_t)BLIS_NO_TRANSPOSE, - m, n, &alpha, a, rsa, csa, b, rsb, csb ); - - bli_dprintm( "b: after trmm", m, n, b, rsb, csb, "%4.1f", "" ); - - // We can confirm the solution by comparing the product of a and x to the - // original value of b. - bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, - m, n, b, rsb, csb, c, rsc, csc ); - bli_dtrmm( BLIS_LEFT, BLIS_LOWER, (trans_t)BLIS_NONUNIT_DIAG, (diag_t)BLIS_NO_TRANSPOSE, - m, n, &alpha, a, rsa, csa, c, rsc, csc ); - - bli_dprintm( "c: should equal initial value of b", m, n, c, rsc, csc, "%4.1f", "" ); - - // Free the memory obtained via malloc(). - free( a ); - free( b ); - free( c ); - - - return hpx::finalize(); -} - -// ----------------------------------------------------------------------------- -int main(int argc, char** argv) { - return hpx::init(argc, argv); -} diff --git a/examples/hpx/tapi/05util.cpp b/examples/hpx/tapi/05util.cpp deleted file mode 100644 index 8bce336fca..0000000000 --- a/examples/hpx/tapi/05util.cpp +++ /dev/null @@ -1,288 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#include "blis.h" - -#include -#include - -int hpx_main( int argc, char** argv ) -{ - double* x; - dcomplex* y; - double* a; - dcomplex* b; - double* c; - double* d; - dcomplex* e; - dcomplex* f; - double* g; - double norm1, normi, normf; - dim_t m, n; - inc_t rs, cs; - - // Initialize some basic constants. - double minus_one = -1.0; - dcomplex minus_one_z = { -1.0, 0.0 }; - - - // - // This file demonstrates working with vector and matrices in the - // context of various utility operations. - // - - - // - // Example 1: Compute various vector norms. - // - - printf( "\n#\n# -- Example 1 --\n#\n\n" ); - - // Create a few matrices to work with. - m = 1; n = 5; rs = 5; cs = 1; - x = static_cast(malloc( m * n * sizeof( double ) )); - y = static_cast(malloc( m * n * sizeof( dcomplex ) )); - - // Initialize the vectors to random values. - bli_drandv( n, x, 1 ); - bli_zrandv( n, y, 1 ); - - bli_dprintm( "x", m, n, x, rs, cs, "%4.1f", "" ); - - // Compute the one, infinity, and frobenius norms of 'x'. Note that when - // computing the norm alpha of a vector 'x', the datatype of alpha must be - // equal to the real projection of the datatype of 'x'. - bli_dnorm1v( n, x, 1, &norm1 ); - bli_dnormiv( n, x, 1, &normi ); - bli_dnormfv( n, x, 1, &normf ); - - bli_dprintm( "x: 1-norm:", 1, 1, &norm1, rs, cs, "%4.1f", "" ); - bli_dprintm( "x: infinity norm:", 1, 1, &normi, rs, cs, "%4.1f", "" ); - bli_dprintm( "x: frobenius norm:", 1, 1, &normf, rs, cs, "%4.1f", "" ); - - bli_zprintm( "y", m, n, y, rs, cs, "%4.1f", "" ); - - // Compute the one, infinity, and frobenius norms of 'y'. Note that we - // can reuse the same scalars from before for computing norms of - // dcomplex matrices, since the real projection of dcomplex is double. - bli_znorm1v( n, y, 1, &norm1 ); - bli_znormiv( n, y, 1, &normi ); - bli_znormfv( n, y, 1, &normf ); - - bli_dprintm( "y: 1-norm:", 1, 1, &norm1, 1, 1, "%4.1f", "" ); - bli_dprintm( "y: infinity norm:", 1, 1, &normi, 1, 1, "%4.1f", "" ); - bli_dprintm( "y: frobenius norm:", 1, 1, &normf, 1, 1, "%4.1f", "" ); - - - // - // Example 2: Compute various matrix norms. - // - - printf( "\n#\n# -- Example 2 --\n#\n\n" ); - - // Create a few matrices to work with. - m = 5; n = 6; rs = 1; cs = m; - a = static_cast(malloc( m * n * sizeof( double ) )); - b = static_cast(malloc( m * n * sizeof( dcomplex ) )); - - // Initialize the matrices to random values. - bli_drandm( 0, BLIS_DENSE, m, n, a, rs, cs ); - bli_zrandm( 0, BLIS_DENSE, m, n, b, rs, cs ); - - bli_dprintm( "a:", m, n, a, rs, cs, "%4.1f", "" ); - - // Compute the one-norm of 'a'. - bli_dnorm1m( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, a, rs, cs, &norm1 ); - bli_dnormim( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, a, rs, cs, &normi ); - bli_dnormfm( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, a, rs, cs, &normf ); - - bli_dprintm( "a: 1-norm:", 1, 1, &norm1, 1, 1, "%4.1f", "" ); - bli_dprintm( "a: infinity norm:", 1, 1, &normi, 1, 1, "%4.1f", "" ); - bli_dprintm( "a: frobenius norm:", 1, 1, &normf, 1, 1, "%4.1f", "" ); - - bli_zprintm( "b:", m, n, b, rs, cs, "%4.1f", "" ); - - // Compute the one-norm of 'b'. - bli_znorm1m( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, b, rs, cs, &norm1 ); - bli_znormim( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, b, rs, cs, &normi ); - bli_znormfm( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, n, b, rs, cs, &normf ); - - bli_dprintm( "a: 1-norm:", 1, 1, &norm1, 1, 1, "%4.1f", "" ); - bli_dprintm( "a: infinity norm:", 1, 1, &normi, 1, 1, "%4.1f", "" ); - bli_dprintm( "a: frobenius norm:", 1, 1, &normf, 1, 1, "%4.1f", "" ); - - - // - // Example 3: Make a real matrix explicitly symmetric (or Hermitian). - // - - printf( "\n#\n# -- Example 3 --\n#\n\n" ); - - // Create a few matrices to work with. - m = 4; n = 4; rs = 1; cs = m; - c = static_cast(malloc( m * m * sizeof( double ) )); - d = static_cast(malloc( m * m * sizeof( double ) )); - - // Initialize all of 'c' to -1.0 to simulate junk values. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, m, &minus_one, c, rs, cs ); - - // Randomize the lower triangle of 'c'. - bli_drandm( 0, BLIS_LOWER, m, m, c, rs, cs ); - - bli_dprintm( "c (initial state):", m, m, c, rs, cs, "%4.1f", "" ); - - // mksymm on a real matrix transposes the stored triangle into the - // unstored triangle, making the matrix densely symmetric. - bli_dmksymm( BLIS_LOWER, m, c, rs, cs ); - - bli_dprintm( "c (after mksymm on lower triangle):", m, m, c, rs, cs, "%4.1f", "" ); - - // Digression: Most people think only of complex matrices as being able - // to be complex. However, in BLIS, we define Hermitian operations on - // real matrices, too--they are simply equivalent to the corresponding - // symmetric operation. For example, when we make a real matrix explicitly - // Hermitian, the result is indistinguishable from making it symmetric. - - // Initialize all of 'd' to -1.0 to simulate junk values. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, m, &minus_one, d, rs, cs ); - - // Randomize the lower triangle of 'd'. - bli_drandm( 0, BLIS_LOWER, m, m, d, rs, cs ); - - bli_dprintm( "d (initial state):", m, m, d, rs, cs, "%4.1f", "" ); - - // mkherm on a real matrix behaves the same as mksymm, as there are no - // imaginary elements to conjugate. - bli_dmkherm( BLIS_LOWER, m, d, rs, cs ); - - bli_dprintm( "c (after mkherm on lower triangle):", m, m, d, rs, cs, "%4.1f", "" ); - - - // - // Example 4: Make a complex matrix explicitly symmetric or Hermitian. - // - - printf( "\n#\n# -- Example 4 --\n#\n\n" ); - - // Create a few matrices to work with. - m = 4; n = 4; rs = 1; cs = m; - e = static_cast(malloc( m * m * sizeof( dcomplex ) )); - f = static_cast(malloc( m * m * sizeof( dcomplex ) )); - - // Initialize all of 'e' to -1.0 to simulate junk values. - bli_zsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, m, &minus_one_z, e, rs, cs ); - - // Randomize the upper triangle of 'e'. - bli_zrandm( 0, BLIS_UPPER, m, m, e, rs, cs ); - - bli_zprintm( "e (initial state):", m, m, e, rs, cs, "%4.1f", "" ); - - // mksymm on a complex matrix transposes the stored triangle into the - // unstored triangle. - bli_zmksymm( BLIS_UPPER, m, e, rs, cs ); - - bli_zprintm( "e (after mksymm on lower triangle):", m, m, e, rs, cs, "%4.1f", "" ); - - // Initialize all of 'f' to -1.0 to simulate junk values. - bli_zsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, m, &minus_one_z, f, rs, cs ); - - // Randomize the upper triangle of 'd'. - bli_zrandm( 0, BLIS_UPPER, m, m, f, rs, cs ); - - bli_zprintm( "f (initial state):", m, m, f, rs, cs, "%4.1f", "" ); - - // mkherm on a real matrix behaves the same as mksymm, as there are no - // imaginary elements to conjugate. - bli_zmkherm( BLIS_UPPER, m, f, rs, cs ); - - bli_zprintm( "f (after mkherm on lower triangle):", m, m, f, rs, cs, "%4.1f", "" ); - - - // - // Example 5: Make a real matrix explicitly triangular. - // - - printf( "\n#\n# -- Example 5 --\n#\n\n" ); - - // Create a few matrices to work with. - m = 5; n = 5; rs = 1; cs = m; - g = static_cast(malloc( m * m * sizeof( double ) )); - - // Initialize all of 'g' to -1.0 to simulate junk values. - bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, - m, m, &minus_one, g, rs, cs ); - - // Randomize the lower triangle of 'g'. - bli_drandm( 0, BLIS_LOWER, m, m, g, rs, cs ); - - bli_dprintm( "g (initial state):", m, m, g, rs, cs, "%4.1f", "" ); - - // mktrim does not explicitly copy any data, since presumably the stored - // triangle already contains the data of interest. However, mktrim does - // explicitly writes zeros to the unstored region. - bli_dmktrim( BLIS_LOWER, m, g, rs, cs ); - - bli_dprintm( "g (after mktrim):", m, m, g, rs, cs, "%4.1f", "" ); - - - // Free the memory obtained via malloc(). - free( x ); - free( y ); - free( a ); - free( b ); - free( c ); - free( d ); - free( e ); - free( f ); - free( g ); - - return hpx::finalize(); -} - -// ----------------------------------------------------------------------------- -int main(int argc, char ** argv) { - return hpx::init(argc, argv); -} diff --git a/examples/hpx/tapi/Makefile b/examples/hpx/tapi/Makefile deleted file mode 100644 index 67c129c2a1..0000000000 --- a/examples/hpx/tapi/Makefile +++ /dev/null @@ -1,43 +0,0 @@ -# BLIS -# An object-based framework for developing high-performance BLAS-like -# libraries. -# -# Copyright (C) 2014, The University of Texas at Austin -# Copyright (C) 2022 Tactical Computing Laboratories, LLC -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# - Neither the name of The University of Texas nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# --include $(SHARE_PATH)/common.mk - -all: - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=00level1v -o 00level1v 00level1v.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=01level1m -o 01level1m 01level1m.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=02level1m_diag -o 02level1m_diag 02level1m_diag.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=03level2 -o 03level2 03level2.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=04level3 -o 04level3 04level3.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - $(CXX) -O3 -I../../../include/x86_64 `pkg-config --cflags hpx_application` -DHPX_APPLICATION_NAME=05util -o 05util 05util.cpp -L../../../lib/x86_64 `pkg-config --libs hpx_application` -lblis - -clean: - rm 00level1v 01level1m 02level1m_diag 03level2 04level3 05util From c5a3bbfc5e2b8fabf0ebe157ebbd94038fde34ef Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Fri, 4 Nov 2022 16:03:02 -0500 Subject: [PATCH 13/21] Remove unnecessary include. --- config/generic/bli_cntx_init_generic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/config/generic/bli_cntx_init_generic.c b/config/generic/bli_cntx_init_generic.c index 287c5ef3e0..70bbb80f96 100644 --- a/config/generic/bli_cntx_init_generic.c +++ b/config/generic/bli_cntx_init_generic.c @@ -33,7 +33,6 @@ */ #include "blis.h" -#include "bli_type_defs.h" void bli_cntx_init_generic( cntx_t* cntx ) { From 284a1429df9d74a4ca65b1ad7c42dfe20843faf6 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Fri, 4 Nov 2022 16:16:13 -0500 Subject: [PATCH 14/21] Clean up copyright headers. Copyrights should only be added when a file is newly added or substantially re-written. --- Makefile | 1 - build/bli_config.h.in | 1 - build/config.mk.in | 1 - common.mk | 1 - configure | 1 - frame/include/bli_type_defs.h | 1 - frame/thread/bli_thrcomm.h | 1 - frame/thread/bli_thrcomm_hpx.h | 1 - frame/thread/bli_thrcomm_hpx_impl.cpp | 3 +-- frame/thread/bli_thrcomm_hpx_impl.hpp | 1 - frame/thread/bli_thread.c | 1 - frame/thread/bli_thread_hpx.h | 1 - frame/thread/bli_thread_hpx_impl.cpp | 1 - frame/thread/bli_thread_hpx_impl.hpp | 1 - 14 files changed, 1 insertion(+), 15 deletions(-) diff --git a/Makefile b/Makefile index f7515ddd92..87356f9978 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,6 @@ # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2022, Advanced Micro Devices, Inc. -# Copyright (C) 2022 Tactical Computing Laboratories, LLC # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are diff --git a/build/bli_config.h.in b/build/bli_config.h.in index 0b9cba1412..716b6e22fc 100644 --- a/build/bli_config.h.in +++ b/build/bli_config.h.in @@ -6,7 +6,6 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. - Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/build/config.mk.in b/build/config.mk.in index 46d349add5..4624220cf0 100644 --- a/build/config.mk.in +++ b/build/config.mk.in @@ -6,7 +6,6 @@ # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2022, Advanced Micro Devices, Inc. -# Copyright (C) 2022 Tactical Computing Laboratories, LLC # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are diff --git a/common.mk b/common.mk index db32e050df..2b7d18b03f 100644 --- a/common.mk +++ b/common.mk @@ -5,7 +5,6 @@ # libraries. # # Copyright (C) 2014, The University of Texas at Austin -# Copyright (C) 2022 Tactical Computing Laboratories, LLC # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are diff --git a/configure b/configure index 4f0209a157..ee548c3eac 100755 --- a/configure +++ b/configure @@ -6,7 +6,6 @@ # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2020-2022, Advanced Micro Devices, Inc. -# Copyright (C) 2022 Tactical Computing Laboratories, LLC # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 436536f3b8..014be18b77 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -7,7 +7,6 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2020, Advanced Micro Devices, Inc. - Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/frame/thread/bli_thrcomm.h b/frame/thread/bli_thrcomm.h index da9487b37f..86b81e8c48 100644 --- a/frame/thread/bli_thrcomm.h +++ b/frame/thread/bli_thrcomm.h @@ -6,7 +6,6 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. - Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/frame/thread/bli_thrcomm_hpx.h b/frame/thread/bli_thrcomm_hpx.h index 73bcbcd1ef..efef7dacc9 100644 --- a/frame/thread/bli_thrcomm_hpx.h +++ b/frame/thread/bli_thrcomm_hpx.h @@ -4,7 +4,6 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without diff --git a/frame/thread/bli_thrcomm_hpx_impl.cpp b/frame/thread/bli_thrcomm_hpx_impl.cpp index 7268c52cbf..ec6f54a3e9 100644 --- a/frame/thread/bli_thrcomm_hpx_impl.cpp +++ b/frame/thread/bli_thrcomm_hpx_impl.cpp @@ -4,7 +4,6 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without @@ -49,7 +48,7 @@ extern "C" { void bli_thrcomm_init_hpx( dim_t n_threads, thrcomm_t* comm ) { if ( comm == NULL ) return; - comm->barrier = new hpx:barrier<>(); + comm->barrier = new hpx:barrier<>(); } void bli_thrcomm_cleanup_hpx( thrcomm_t* comm ) diff --git a/frame/thread/bli_thrcomm_hpx_impl.hpp b/frame/thread/bli_thrcomm_hpx_impl.hpp index b12f54cdd2..ea6330349c 100644 --- a/frame/thread/bli_thrcomm_hpx_impl.hpp +++ b/frame/thread/bli_thrcomm_hpx_impl.hpp @@ -4,7 +4,6 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without diff --git a/frame/thread/bli_thread.c b/frame/thread/bli_thread.c index c5df371086..4a109138f9 100644 --- a/frame/thread/bli_thread.c +++ b/frame/thread/bli_thread.c @@ -6,7 +6,6 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. - Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/frame/thread/bli_thread_hpx.h b/frame/thread/bli_thread_hpx.h index 336b16e65a..406c388904 100644 --- a/frame/thread/bli_thread_hpx.h +++ b/frame/thread/bli_thread_hpx.h @@ -4,7 +4,6 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without diff --git a/frame/thread/bli_thread_hpx_impl.cpp b/frame/thread/bli_thread_hpx_impl.cpp index 1922fcc664..4ed0ec1c76 100644 --- a/frame/thread/bli_thread_hpx_impl.cpp +++ b/frame/thread/bli_thread_hpx_impl.cpp @@ -4,7 +4,6 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without diff --git a/frame/thread/bli_thread_hpx_impl.hpp b/frame/thread/bli_thread_hpx_impl.hpp index b08e6c0d5b..cc91f928cf 100644 --- a/frame/thread/bli_thread_hpx_impl.hpp +++ b/frame/thread/bli_thread_hpx_impl.hpp @@ -4,7 +4,6 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2022 Tactical Computing Laboratories, LLC Redistribution and use in source and binary forms, with or without From 6015a76c87ef31a27383dae449c40ad496b0ee39 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Fri, 4 Nov 2022 23:12:23 -0500 Subject: [PATCH 15/21] Adjustments to streamline HPX config/make. Also, fixes for macOS. --- Makefile | 2 +- common.mk | 55 ++++++++++++++++++++++++++----------------------------- configure | 1 + 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/Makefile b/Makefile index 87356f9978..6dd48f9090 100644 --- a/Makefile +++ b/Makefile @@ -554,7 +554,7 @@ else endif $(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.cpp $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) -ifeq ($(ENABLE_HPX),yes) +ifneq ($(findstring hpx,$(THREADING_MODEL)),) ifeq ($(ENABLE_VERBOSE),yes) $(CXX) $(call get-addon-cxxflags-for,$(1)) -c $$< -o $$@ else diff --git a/common.mk b/common.mk index 2b7d18b03f..05fd1ad73d 100644 --- a/common.mk +++ b/common.mk @@ -427,8 +427,6 @@ KERNELS_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(KERNELS_DIR) ADDON_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(ADDON_DIR) SANDBOX_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(SANDBOX_DIR) -ENABLE_HPX := no - # # --- Library name and local paths --------------------------------------------- @@ -689,8 +687,12 @@ endif # --- Linker program --- -# Use whatever compiler was chosen. +# Use whatever compiler was chosen. A C++ compiler must be used if HPX is enabled. +ifneq ($(findstring hpx,$(THREADING_MODEL)),) +LINKER := $(CXX) +else LINKER := $(CC) +endif # --- Warning flags --- @@ -800,14 +802,22 @@ endif CLANGFLAGS := -std=c99 $(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CLANGFLAGS,$(c)))) -# Enable C++11. +# Enable C++11, or C++17 if HPX threading is enabled. +ifneq ($(findstring hpx,$(THREADING_MODEL)),) +CXXLANGFLAGS := -std=c++17 +else CXXLANGFLAGS := -std=c++11 +endif $(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CXXLANGFLAGS,$(c)))) # --- C Preprocessor flags --- # Enable clock_gettime() in time.h. CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L +# Enable ip_mreq on macOS which is needed for ASIO which is needed for HPX +ifeq ($(OS_NAME),Darwin) +CPPROCFLAGS += -D_DARWIN_C_SOURCE +endif $(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CPPROCFLAGS,$(c)))) # --- AddressSanitizer flags --- @@ -839,16 +849,6 @@ ifneq ($(findstring pthreads,$(THREADING_MODEL)),) CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif -ifneq ($(findstring hpx,$(THREADING_MODEL)),) -ENABLE_HPX := yes -ifneq ($(findstring yes,$(ENABLE_DEBUG)),) -CXXTHREADFLAGS += `pkg-config --cflags hpx_component` -LDFLAGS += `pkg-config --libs hpx_component` -else -CXXTHREADFLAGS += `pkg-config --cflags hpx_component_debug` -LDFLAGS += `pkg-config --libs hpx_component_debug` -endif -endif endif ifeq ($(CC_VENDOR),icc) @@ -863,16 +863,6 @@ ifneq ($(findstring pthreads,$(THREADING_MODEL)),) CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif -ifneq ($(findstring hpx,$(THREADING_MODEL)),) -ENABLE_HPX := yes -ifneq ($(findstring yes,$(ENABLE_DEBUG)),) -CXXTHREADFLAGS += `pkg-config --cflags hpx_component` -LDFLAGS += `pkg-config --libs hpx_component` -else -CXXTHREADFLAGS += `pkg-config --cflags hpx_component_debug` -LDFLAGS += `pkg-config --libs hpx_component_debug` -endif -endif endif ifeq ($(CC_VENDOR),clang) @@ -887,15 +877,22 @@ ifneq ($(findstring pthreads,$(THREADING_MODEL)),) CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif +endif + +# Threading flags for HPX ifneq ($(findstring hpx,$(THREADING_MODEL)),) -ENABLE_HPX := yes ifneq ($(findstring yes,$(ENABLE_DEBUG)),) -CXXTHREADFLAGS += `pkg-config --cflags hpx_component` -LDFLAGS += `pkg-config --libs hpx_component` +HPX_CXXFLAGS := $(shell pkg-config --cflags hpx_component_debug) +HPX_LDFLAGS := $(filter-out -shared,$(shell pkg-config --libs hpx_component_debug)) else -CXXTHREADFLAGS += `pkg-config --cflags hpx_component_debug` -LDFLAGS += `pkg-config --libs hpx_component_debug` +HPX_CXXFLAGS := $(shell pkg-config --cflags hpx_component) +HPX_LDFLAGS := $(filter-out -shared,$(shell pkg-config --libs hpx_component)) endif +CTHREADFLAGS += $(filter-out -std=%,$(HPX_CXXFLAGS)) +LDFLAGS += $(HPX_LDFLAGS) +ifeq ($(OS_NAME),Darwin) +RPATH_PREFIX := -Wl,-rpath, +LDFLAGS += $(patsubst -L%,$(RPATH_PREFIX)%,$(filter -L%,$(HPX_LDFLAGS))) endif endif diff --git a/configure b/configure index ee548c3eac..eac58433be 100755 --- a/configure +++ b/configure @@ -4095,6 +4095,7 @@ main() | sed -e "s/@ldflags_preset@/${ldflags_preset_esc}/g" \ | sed -e "s/@enable_asan@/${enable_asan}/g" \ | sed -e "s/@debug_type@/${debug_type}/g" \ + | sed -e "s/@enable_debug@/${enable_debug}/g" \ | sed -e "s/@enable_system@/${enable_system}/g" \ | sed -e "s/@threading_model@/${threading_model}/g" \ | sed -e "s/@prefix@/${prefix_esc}/g" \ From 3291fdfacac357d4dc7965074de2554ee9c794c2 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Fri, 4 Nov 2022 23:12:44 -0500 Subject: [PATCH 16/21] Simplify the HPX threading implementation. --- ...rcomm_hpx_impl.cpp => bli_thrcomm_hpx.cpp} | 30 ++-- frame/thread/bli_thrcomm_hpx.h | 6 +- ...hrcomm_hpx_impl.hpp => bli_thread_hpx.cpp} | 42 ++++-- frame/thread/bli_thread_hpx.h | 9 +- frame/thread/bli_thread_hpx_impl.cpp | 128 ------------------ frame/thread/bli_thread_hpx_impl.hpp | 53 -------- 6 files changed, 47 insertions(+), 221 deletions(-) rename frame/thread/{bli_thrcomm_hpx_impl.cpp => bli_thrcomm_hpx.cpp} (86%) rename frame/thread/{bli_thrcomm_hpx_impl.hpp => bli_thread_hpx.cpp} (66%) delete mode 100644 frame/thread/bli_thread_hpx_impl.cpp delete mode 100644 frame/thread/bli_thread_hpx_impl.hpp diff --git a/frame/thread/bli_thrcomm_hpx_impl.cpp b/frame/thread/bli_thrcomm_hpx.cpp similarity index 86% rename from frame/thread/bli_thrcomm_hpx_impl.cpp rename to frame/thread/bli_thrcomm_hpx.cpp index ec6f54a3e9..d9fb258c2d 100644 --- a/frame/thread/bli_thrcomm_hpx_impl.cpp +++ b/frame/thread/bli_thrcomm_hpx.cpp @@ -36,24 +36,22 @@ #ifdef BLIS_ENABLE_HPX -#ifdef BLIS_USE_HPX_BARRIER - -#ifdef __cplusplus extern "C" { -#endif + +#ifdef BLIS_USE_HPX_BARRIER // Define the pthread_barrier_t implementations of the init, cleanup, and // barrier functions. void bli_thrcomm_init_hpx( dim_t n_threads, thrcomm_t* comm ) { - if ( comm == NULL ) return; + if ( comm == nullptr ) return; comm->barrier = new hpx:barrier<>(); } void bli_thrcomm_cleanup_hpx( thrcomm_t* comm ) { - if ( comm == NULL ) return; + if ( comm == nullptr ) return; delete comm->barrier; } @@ -62,24 +60,16 @@ void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm ) comm->barrier->arrive_and_wait(); } -#ifdef __cplusplus -} -#endif - #else -#ifdef __cplusplus -extern "C" { -#endif - -// Define the non-pthread_barrier_t implementations of the init, cleanup, -// and barrier functions. These are the default unless the pthread_barrier_t +// Define the non-hpx::barrier implementations of the init, cleanup, +// and barrier functions. These are the default unless the hpx::barrier // versions are requested at compile-time. void bli_thrcomm_init_hpx( dim_t n_threads, thrcomm_t* comm ) { - if ( comm == NULL ) return; - comm->sent_object = NULL; + if ( comm == nullptr ) return; + comm->sent_object = nullptr; comm->n_threads = n_threads; comm->barrier_sense = 0; comm->barrier_threads_arrived = 0; @@ -94,9 +84,7 @@ void bli_thrcomm_barrier_hpx( dim_t t_id, thrcomm_t* comm ) bli_thrcomm_barrier_atomic( t_id, comm ); } -#ifdef __cplusplus -} -#endif +} // extern "C" #endif diff --git a/frame/thread/bli_thrcomm_hpx.h b/frame/thread/bli_thrcomm_hpx.h index efef7dacc9..d80cd22683 100644 --- a/frame/thread/bli_thrcomm_hpx.h +++ b/frame/thread/bli_thrcomm_hpx.h @@ -35,10 +35,12 @@ #ifndef BLIS_THRCOMM_HPX_H #define BLIS_THRCOMM_HPX_H -// Define these prototypes for situations when POSIX multithreading is enabled. +// Define these prototypes for situations when HPX multithreading is enabled. #ifdef BLIS_ENABLE_HPX -#include "bli_thrcomm_hpx_impl.hpp" +void bli_thrcomm_init_hpx( dim_t nt, thrcomm_t* comm ); +void bli_thrcomm_cleanup_hpx( thrcomm_t* comm ); +void bli_thrcomm_barrier_hpx( dim_t tid, thrcomm_t* comm ); #endif diff --git a/frame/thread/bli_thrcomm_hpx_impl.hpp b/frame/thread/bli_thread_hpx.cpp similarity index 66% rename from frame/thread/bli_thrcomm_hpx_impl.hpp rename to frame/thread/bli_thread_hpx.cpp index ea6330349c..226383bd2c 100644 --- a/frame/thread/bli_thrcomm_hpx_impl.hpp +++ b/frame/thread/bli_thread_hpx.cpp @@ -32,26 +32,38 @@ */ -#ifndef BLIS_THRCOMM_HPX_IMPL_H -#define BLIS_THRCOMM_HPX_IMPL_H +#include "blis.h" -// Define these prototypes for situations when POSIX multithreading is enabled. #ifdef BLIS_ENABLE_HPX -#ifdef __cplusplus -extern "C" { -#endif +#include +#include -// pthreads-specific function prototypes. -void bli_thrcomm_init_hpx( dim_t nt, thrcomm_t* comm ); -void bli_thrcomm_cleanup_hpx( thrcomm_t* comm ); -void bli_thrcomm_barrier_hpx( dim_t tid, thrcomm_t* comm ); +extern "C" +void bli_thread_launch_hpx + ( + dim_t n_threads, + thread_func_t func, + const void* params + ) +{ + const timpl_t ti = BLIS_HPX; -#ifdef __cplusplus -} -#endif + // Allocate a global communicator for the root thrinfo_t structures. + pool_t* gl_comm_pool = nullptr; + thrcomm_t* gl_comm = bli_thrcomm_create( ti, gl_comm_pool, n_threads ); -#endif + auto irange = hpx::util::detail::make_counting_shape(n_threads); -#endif + hpx::for_each(hpx::execution::par, hpx::util::begin(irange), hpx::util::end(irange), + [&gl_comm, &func, ¶ms](const dim_t tid) + { + func( gl_comm, tid, params ); + }); + + // Free the global communicator, because the root thrinfo_t node + // never frees its communicator. + bli_thrcomm_free( gl_comm_pool, gl_comm ); +} +#endif diff --git a/frame/thread/bli_thread_hpx.h b/frame/thread/bli_thread_hpx.h index 406c388904..95d5c1fad9 100644 --- a/frame/thread/bli_thread_hpx.h +++ b/frame/thread/bli_thread_hpx.h @@ -35,10 +35,15 @@ #ifndef BLIS_THREAD_HPX_H #define BLIS_THREAD_HPX_H -// Definitions specific to situations when POSIX multithreading is enabled. +// Definitions specific to situations when HPX multithreading is enabled. #ifdef BLIS_ENABLE_HPX -#include "bli_thread_hpx_impl.hpp" +void bli_thread_launch_hpx + ( + dim_t nt, + thread_func_t func, + const void* params + ); #endif diff --git a/frame/thread/bli_thread_hpx_impl.cpp b/frame/thread/bli_thread_hpx_impl.cpp deleted file mode 100644 index 4ed0ec1c76..0000000000 --- a/frame/thread/bli_thread_hpx_impl.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -#ifdef BLIS_ENABLE_HPX - -#include -#include - -// A data structure to assist in passing operands to additional threads. -typedef struct thread_data -{ - dim_t tid; - thrcomm_t* gl_comm; - thread_func_t func; - const void* params; -} thread_data_t; - -// Entry point for additional threads -static void* bli_hpx_thread_entry( void* data_void ) -{ - const thread_data_t* data = static_cast(data_void); - - const dim_t tid = data->tid; - thrcomm_t* gl_comm = data->gl_comm; - thread_func_t func = data->func; - const void* params = data->params; - - // Call the thread entry point, passing the global communicator, the - // thread id, and the params struct as arguments. - func( gl_comm, tid, params ); - - return NULL; -} - -#ifdef __cplusplus -extern "C" { -#endif - -void bli_thread_launch_hpx( dim_t n_threads, thread_func_t func, const void* params ) -{ - err_t r_val; - - const timpl_t ti = BLIS_HPX; - - // Allocate a global communicator for the root thrinfo_t structures. - pool_t* gl_comm_pool = NULL; - thrcomm_t* gl_comm = bli_thrcomm_create( ti, gl_comm_pool, n_threads ); - - // Allocate an array of pthread objects and auxiliary data structs to pass - // to the thread entry functions. - - #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_l3_thread_decorator().pth: " ); - #endif - - #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_l3_thread_decorator().pth: " ); - #endif - thread_data_t* datas = static_cast(bli_malloc_intl( sizeof( thread_data_t ) * n_threads, &r_val )); - - // NOTE: We must iterate backwards so that the chief thread (thread id 0) - // can spawn all other threads before proceeding with its own computation. - auto irange = hpx::util::detail::make_counting_shape(n_threads); - - hpx::for_each(hpx::execution::par, hpx::util::begin(irange), hpx::util::end(irange), - [&datas, &gl_comm, &func, ¶ms](const dim_t tid) { - // Set up thread data for additional threads (beyond thread 0). - datas[tid].tid = tid; - datas[tid].gl_comm = gl_comm; - datas[tid].func = func; - datas[tid].params = params; - - bli_hpx_thread_entry(&datas[0]); - }); - - // Free the global communicator, because the root thrinfo_t node - // never frees its communicator. - bli_thrcomm_free( gl_comm_pool, gl_comm ); - - // Free the array of pthread objects and auxiliary data structs. - #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_l3_thread_decorator().pth: " ); - #endif - - #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_l3_thread_decorator().pth: " ); - #endif - bli_free_intl( datas ); -} - -#ifdef __cplusplus -} // end extern "C" -#endif - -#endif diff --git a/frame/thread/bli_thread_hpx_impl.hpp b/frame/thread/bli_thread_hpx_impl.hpp deleted file mode 100644 index cc91f928cf..0000000000 --- a/frame/thread/bli_thread_hpx_impl.hpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2022 Tactical Computing Laboratories, LLC - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_THREAD_HPX_IMPL_H -#define BLIS_THREAD_HPX_IMPL_H - -// Definitions specific to situations when POSIX multithreading is enabled. -#ifdef BLIS_ENABLE_HPX - -#ifdef __cplusplus -extern "C" { -#endif - -void bli_thread_launch_hpx(dim_t n_threads, thread_func_t func, const void* params); - -#ifdef __cplusplus -} -#endif - -#endif - -#endif From 82883cdfe9e8104848a6c54aff0e11c4097a4ce6 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Fri, 4 Nov 2022 23:38:38 -0500 Subject: [PATCH 17/21] Adjust documentation for HPX. --- README.md | 2 +- configure | 12 ++++++------ docs/FAQ.md | 4 ++-- docs/Multithreading.md | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 012861366c..7cadb3d0b6 100644 --- a/README.md +++ b/README.md @@ -265,7 +265,7 @@ writing complex kernels. * **Advanced multithreading support.** BLIS allows multiple levels of symmetric multithreading for nearly all level-3 operations. (Currently, users -may choose to obtain parallelism via either OpenMP or POSIX threads). This +may choose to obtain parallelism via OpenMP, POSIX threads, or HPX). This means that matrices may be partitioned in multiple dimensions simultaneously to attain scalable, high-performance parallelism on multicore and many-core architectures. The key to this innovation is a thread-specific control tree diff --git a/configure b/configure index eac58433be..133d0bbe7b 100755 --- a/configure +++ b/configure @@ -170,7 +170,7 @@ print_usage() echo " -t MODEL, --enable-threading[=MODEL], --disable-threading" echo " " echo " Enable threading in the library, using threading model(s)" - echo " MODEL={single,openmp,pthreads,auto}. If multiple values" + echo " MODEL={single,openmp,pthreads,hpx,auto}. If multiple values" echo " are specified within MODEL, they will all be compiled into" echo " BLIS, and the choice of which to use will be determined at" echo " runtime. If the user does not express a preference (by" @@ -3532,7 +3532,6 @@ main() parsed_tm="${parsed_tm} hpx" - elif [ "x${word}" = "xauto" ]; then parsed_tm="${parsed_tm} auto" @@ -3625,11 +3624,12 @@ main() enable_pthreads='yes' enable_pthreads_01=1 - elif [ "x${word}" = "xhpx" ]; then + elif [ "x${word}" = "xhpx" ]; then + + echo "${script_name}: enabling support for threading via hpx." + enable_hpx='yes' + enable_hpx_01=1 - echo "${script_name}: enabling support for threading via hpx." - enable_hpx='yes' - enable_hpx_01=1 fi done diff --git a/docs/FAQ.md b/docs/FAQ.md index 3d0852d36f..aee099b37e 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -115,7 +115,7 @@ For more information on macrokernels, please read our [ACM TOMS papers](https:// As of 0.2.0, BLIS contains a new infrastructure for communicating runtime information (such as kernel addresses and blocksizes) from the highest levels of code all the way down the function stack, even into the kernels themselves. This new data structure is called a *context* (defined in code as a `cntx_t` type), and together with its API it helped us clean up some hacks and other awkwardness that existed in BLIS prior to 0.2.0. Contexts also lay the groundwork for managing kernels and related kernel information at runtime. -If you are a kernel developer, you can usually ignore the `cntx_t*` argument that is passed into each kernel, since the kernels already inherently "know" this information (such as register blocksizes). And if you are a user, and the function you want to call takes a `cntx_t*` argument, you can safely pass in `NULL` and BLIS will automatically build a suitable context for you at runtime. +If you are a kernel developer, you can usually ignore the `cntx_t*` argument that is passed into each kernel, since the kernels already inherently "know" this information (such as register blocksizes). And if you are a user, and the function you want to call takes a `cntx_t*` argument, you can safely pass in `NULL` and BLIS will automatically build a suitable context for you at runtime. ### I'm used to thinking in terms of column-major/row-major storage and leading dimensions. What is a "row stride" / "column stride"? @@ -171,7 +171,7 @@ Originally, BLIS did indeed require the application to explicitly setup (initial ### Does BLIS support multithreading? -Yes! BLIS supports multithreading (via OpenMP or POSIX threads) for all of its level-3 operations. For more information on enabling and controlling multithreading, please see the [Multithreading](Multithreading.md) guide. +Yes! BLIS supports multithreading (via OpenMP, POSIX threads, or HPX) for all of its level-3 operations. For more information on enabling and controlling multithreading, please see the [Multithreading](Multithreading.md) guide. BLIS is also thread-safe so that you can call BLIS from threads within a multithreaded library or application. BLIS derives its thread-safety via unconditional use of features present in POSIX threads (pthreads). These pthreads features are employed for thread-safety regardless of whether BLIS is configured for OpenMP multithreading, pthreads multithreading, or single-threaded execution. diff --git a/docs/Multithreading.md b/docs/Multithreading.md index 933296f794..1a46f65566 100644 --- a/docs/Multithreading.md +++ b/docs/Multithreading.md @@ -246,7 +246,7 @@ This will result in both OpenMP and pthreads implementations being compiled and ```c void bli_thread_set_thread_impl( timpl_t ti ); ``` -The function takes a `timpl_t`, which is an enumerated type that has three valid values corresponding to the three possible threading implementations: `BLIS_OPENMP`, `BLIS_POSIX`, and `BLIS_SINGLE`. Forcing use of pthreads is as simple as calling: +The function takes a `timpl_t`, which is an enumerated type that has three valid values corresponding to the four possible threading implementations: `BLIS_OPENMP`, `BLIS_POSIX`, `BLIS_HPX`, and `BLIS_SINGLE`. Forcing use of pthreads is as simple as calling: ```c bli_thread_set_thread_impl( BLIS_POSIX ) ``` @@ -321,7 +321,7 @@ This will result in both OpenMP and pthreads implementations being compiled and ```c void bli_rntm_set_thread_impl( timpl_t ti, rntm_t* rntm ); ``` -The function takes a `timpl_t`, which is an enumerated type that has three valid values corresponding to the three possible threading implementations: `BLIS_OPENMP`, `BLIS_POSIX`, and `BLIS_SINGLE`. Forcing use of pthreads is as simple as calling: +The function takes a `timpl_t`, which is an enumerated type that has three valid values corresponding to the four possible threading implementations: `BLIS_OPENMP`, `BLIS_POSIX`, `BLIS_HPX`, and `BLIS_SINGLE`. Forcing use of pthreads is as simple as calling: ```c bli_rntm_set_thread_impl( BLIS_POSIX, &rntm ); ``` @@ -366,7 +366,7 @@ Also, you may pass in `NULL` for the `rntm_t*` parameter of an expert interface. This situation could lead to unexpectedly low multithreaded performance. Suppose the user calls `gemm` on a problem with a large m dimension and small k and n dimensions, and explicitly requests parallelism only in the IC loop, but also suppose that the storage of C does not match that of the microkernel's preference. After BLIS transposes the operation internally, the *effective* m dimension will no longer be large; instead, it will be small (because the original m and n dimension will have been swapped). The multithreaded implementation will then proceed to parallelize this small m dimension. There are currently no good *and* easy solutions to this problem. Eventually, though, we plan to add support for two microkernels per datatype per configuration--one for use with matrices C that are row-stored, and one for those that are column-stored. This will obviate the logic within BLIS that sometimes induces the operation transposition, and the problem will go away. - + * **Thread affinity when BLIS and MKL are used together.** Some users have reported that when running a program that links both BLIS (configured with OpenMP) and MKL, **and** when OpenMP thread affinity has been specified (e.g. via `OMP_PROC_BIND` and `OMP_PLACES`), that very poor performance is observed. This may be due to incorrect thread masking, causing all threads to run on one physical core. The exact circumstances leading to this behavior have not been identified, but unsetting the OpenMP thread affinity variables appears to be a solution. # Conclusion From 7a821eea3173555caaa9bf23eae493056c10bb04 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Fri, 4 Nov 2022 23:38:51 -0500 Subject: [PATCH 18/21] Add some missing bits for HPX threading. --- frame/3/bli_l3_decor.c | 7 +++++-- frame/base/bli_info.c | 19 ++++++++++++++++++- frame/base/bli_info.h | 2 ++ frame/include/bli_config_macro_defs.h | 10 +++++++++- frame/thread/bli_thrcomm.c | 26 ++++++++++++++++---------- frame/thread/bli_thrcomm.h | 4 ++-- frame/thread/bli_thread.c | 11 +++++------ testsuite/src/test_libblis.c | 16 ++++++++++++---- 8 files changed, 69 insertions(+), 26 deletions(-) diff --git a/frame/3/bli_l3_decor.c b/frame/3/bli_l3_decor.c index e482d37a1a..4160751e6e 100644 --- a/frame/3/bli_l3_decor.c +++ b/frame/3/bli_l3_decor.c @@ -224,13 +224,16 @@ void bli_l3_thread_decorator_check #endif #ifndef BLIS_ENABLE_PTHREADS ti == BLIS_POSIX || +#endif +#ifndef BLIS_ENABLE_HPX + ti == BLIS_HPX || #endif FALSE ) { fprintf( stderr, "\n" ); - fprintf( stderr, "libblis: User requested threading implementation \"%s\", but that method is\n", ( ti == BLIS_OPENMP ? "openmp" : "pthreads" ) ); - fprintf( stderr, "libblis: unavailable. Try reconfiguring BLIS with \"-t %s\" and recompiling.\n", ( ti == BLIS_OPENMP ? "openmp" : "pthreads" ) ); + fprintf( stderr, "libblis: User requested threading implementation \"%s\", but that method is\n", bli_thread_get_thread_impl_str( ti ) ); + fprintf( stderr, "libblis: unavailable. Try reconfiguring BLIS with \"-t %s\" and recompiling.\n", bli_thread_get_thread_impl_str( ti ) ); fprintf( stderr, "libblis: %s: line %d\n", __FILE__, ( int )__LINE__ ); bli_abort(); } diff --git a/frame/base/bli_info.c b/frame/base/bli_info.c index 9d6e181d3f..1f00537d59 100644 --- a/frame/base/bli_info.c +++ b/frame/base/bli_info.c @@ -104,7 +104,8 @@ gint_t bli_info_get_enable_sba_pools( void ) gint_t bli_info_get_enable_threading( void ) { if ( bli_info_get_enable_openmp() || - bli_info_get_enable_pthreads() ) return 1; + bli_info_get_enable_pthreads() || + bli_info_get_enable_hpx() ) return 1; else return 0; } gint_t bli_info_get_enable_openmp( void ) @@ -123,6 +124,14 @@ gint_t bli_info_get_enable_pthreads( void ) return 0; #endif } +gint_t bli_info_get_enable_hpx( void ) +{ +#ifdef BLIS_ENABLE_HPX + return 1; +#else + return 0; +#endif +} gint_t bli_info_get_enable_openmp_as_default( void ) { #ifdef BLIS_ENABLE_OPENMP_AS_DEFAULT @@ -139,6 +148,14 @@ gint_t bli_info_get_enable_pthreads_as_default( void ) return 0; #endif } +gint_t bli_info_get_enable_hpx_as_default( void ) +{ +#ifdef BLIS_ENABLE_HPX_AS_DEFAULT + return 1; +#else + return 0; +#endif +} gint_t bli_info_get_thread_part_jrir_slab( void ) { #ifdef BLIS_ENABLE_JRIR_SLAB diff --git a/frame/base/bli_info.h b/frame/base/bli_info.h index b3514f4341..08a99daea9 100644 --- a/frame/base/bli_info.h +++ b/frame/base/bli_info.h @@ -70,8 +70,10 @@ BLIS_EXPORT_BLIS gint_t bli_info_get_enable_sba_pools( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_threading( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_openmp( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_pthreads( void ); +BLIS_EXPORT_BLIS gint_t bli_info_get_enable_hpx( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_openmp_as_default( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_pthreads_as_default( void ); +BLIS_EXPORT_BLIS gint_t bli_info_get_enable_hpx_as_default( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_thread_part_jrir_slab( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_thread_part_jrir_rr( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_memkind( void ); diff --git a/frame/include/bli_config_macro_defs.h b/frame/include/bli_config_macro_defs.h index 542973b18a..633d7f6717 100644 --- a/frame/include/bli_config_macro_defs.h +++ b/frame/include/bli_config_macro_defs.h @@ -83,12 +83,20 @@ // Default behavior is disabled. #endif +// Enable multithreading via HPX. +#ifdef BLIS_ENABLE_HPX + // No additional definitions needed. +#else + // Default behavior is disabled. +#endif + // Here, we define BLIS_ENABLE_MULTITHREADING if either OpenMP // or pthreads are enabled. This macro is useful in situations when // we want to detect use of either OpenMP or pthreads, or both (as // opposed to neither being used). #if defined ( BLIS_ENABLE_OPENMP ) || \ - defined ( BLIS_ENABLE_PTHREADS ) + defined ( BLIS_ENABLE_PTHREADS ) || \ + defined ( BLIS_ENABLE_HPX ) #define BLIS_ENABLE_MULTITHREADING #endif diff --git a/frame/thread/bli_thrcomm.c b/frame/thread/bli_thrcomm.c index 0547d296e8..3b47977885 100644 --- a/frame/thread/bli_thrcomm.c +++ b/frame/thread/bli_thrcomm.c @@ -74,16 +74,18 @@ static thrcomm_init_ft init_fpa[ BLIS_NUM_THREAD_IMPLS ] = [BLIS_OPENMP] = #if defined(BLIS_ENABLE_OPENMP) bli_thrcomm_init_openmp, -#elif defined(BLIS_ENABLE_PTHREADS) - NULL, #else NULL, #endif [BLIS_POSIX] = #if defined(BLIS_ENABLE_PTHREADS) bli_thrcomm_init_pthreads, -#elif defined(BLIS_ENABLE_OPENMP) +#else NULL, +#endif + [BLIS_HPX] = +#if defined(BLIS_ENABLE_HPX) + bli_thrcomm_init_hpx, #else NULL, #endif @@ -94,16 +96,18 @@ static thrcomm_cleanup_ft cleanup_fpa[ BLIS_NUM_THREAD_IMPLS ] = [BLIS_OPENMP] = #if defined(BLIS_ENABLE_OPENMP) bli_thrcomm_cleanup_openmp, -#elif defined(BLIS_ENABLE_PTHREADS) - NULL, #else NULL, #endif [BLIS_POSIX] = #if defined(BLIS_ENABLE_PTHREADS) bli_thrcomm_cleanup_pthreads, -#elif defined(BLIS_ENABLE_OPENMP) +#else NULL, +#endif + [BLIS_HPX] = +#if defined(BLIS_ENABLE_PTHREADS) + bli_thrcomm_cleanup_hpx, #else NULL, #endif @@ -114,16 +118,18 @@ static thrcomm_barrier_ft barrier_fpa[ BLIS_NUM_THREAD_IMPLS ] = [BLIS_OPENMP] = #if defined(BLIS_ENABLE_OPENMP) bli_thrcomm_barrier_openmp, -#elif defined(BLIS_ENABLE_PTHREADS) - bli_thrcomm_barrier_pthreads, #else bli_thrcomm_barrier_single, #endif [BLIS_POSIX] = #if defined(BLIS_ENABLE_PTHREADS) bli_thrcomm_barrier_pthreads, -#elif defined(BLIS_ENABLE_OPENMP) - bli_thrcomm_barrier_openmp, +#else + bli_thrcomm_barrier_single, +#endif + [BLIS_HPX] = +#if defined(BLIS_ENABLE_PTHREADS) + bli_thrcomm_barrier_hpx, #else bli_thrcomm_barrier_single, #endif diff --git a/frame/thread/bli_thrcomm.h b/frame/thread/bli_thrcomm.h index 86b81e8c48..b65cb0b7a3 100644 --- a/frame/thread/bli_thrcomm.h +++ b/frame/thread/bli_thrcomm.h @@ -94,8 +94,8 @@ typedef struct thrcomm_s #endif #endif - #ifdef BLIS_ENABLE_PTHREADS - #ifdef BLIS_USE_PTHREAD_BARRIER + #ifdef BLIS_ENABLE_HPX + #ifdef BLIS_USE_HPX_BARRIER hpx::barrier<> * barrier; #endif #endif diff --git a/frame/thread/bli_thread.c b/frame/thread/bli_thread.c index 4a109138f9..4cba76b207 100644 --- a/frame/thread/bli_thread.c +++ b/frame/thread/bli_thread.c @@ -61,24 +61,18 @@ static thread_launch_t thread_launch_fpa[ BLIS_NUM_THREAD_IMPLS ] = [BLIS_OPENMP] = #if defined(BLIS_ENABLE_OPENMP) bli_thread_launch_openmp, -#elif defined(BLIS_ENABLE_PTHREADS) - NULL, #else NULL, #endif [BLIS_POSIX] = #if defined(BLIS_ENABLE_PTHREADS) bli_thread_launch_pthreads, -#elif defined(BLIS_ENABLE_OPENMP) - NULL, #else NULL, #endif [BLIS_HPX] = #if defined(BLIS_ENABLE_HPX) bli_thread_launch_hpx, -#elif defined(BLIS_ENABLE_OPENMP) - NULL, #else NULL, #endif @@ -1616,6 +1610,7 @@ static const char* bli_timpl_string[BLIS_NUM_THREAD_IMPLS] = [BLIS_SINGLE] = "single", [BLIS_OPENMP] = "openmp", [BLIS_POSIX] = "pthreads", + [BLIS_HPX] = "hpx", }; const char* bli_thread_get_thread_impl_str( timpl_t ti ) @@ -1725,6 +1720,7 @@ void bli_thread_init_rntm_from_env else if ( !strncmp( ti_env, "pthreads", 8 ) ) ti = BLIS_POSIX; else if ( !strncmp( ti_env, "pthread", 7 ) ) ti = BLIS_POSIX; else if ( !strncmp( ti_env, "posix", 5 ) ) ti = BLIS_POSIX; + else if ( !strncmp( ti_env, "hpx", 3 ) ) ti = BLIS_HPX; else ti = BLIS_SINGLE; #ifdef PRINT_IMPL @@ -1744,6 +1740,9 @@ void bli_thread_init_rntm_from_env #ifdef BLIS_ENABLE_PTHREADS_AS_DEFAULT ti = BLIS_POSIX; #endif + #ifdef BLIS_ENABLE_HPX_AS_DEFAULT + ti = BLIS_HPX; + #endif #ifdef PRINT_IMPL printf( "BLIS_THREAD_IMPL unset; defaulting to BLIS_THREAD_IMPL=%s.\n", diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index aec9357ae9..76c219c312 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -782,26 +782,34 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) const bool has_openmp = bli_info_get_enable_openmp(); const bool has_pthreads = bli_info_get_enable_pthreads(); + const bool has_hpx = bli_info_get_enable_hpx(); const bool openmp_is_def = bli_info_get_enable_openmp_as_default(); const bool pthreads_is_def = bli_info_get_enable_pthreads_as_default(); + const bool hpx_is_def = bli_info_get_enable_hpx_as_default(); const timpl_t ti = bli_thread_get_thread_impl(); // List the available threading implementation(s). - if ( has_openmp && has_pthreads ) sprintf( impl_str, "openmp,pthreads,single" ); - else if ( has_openmp ) sprintf( impl_str, "openmp,single" ); - else if ( has_pthreads ) sprintf( impl_str, "pthreads,single" ); - else sprintf( impl_str, "single only" ); + if ( has_hpx && has_openmp && has_pthreads ) sprintf( impl_str, "openmp,pthreads,hpx,single" ); + else if ( has_hpx && has_openmp ) sprintf( impl_str, "openmp,hpx,single" ); + else if ( has_hpx && has_pthreads ) sprintf( impl_str, "pthreads,hpx,single" ); + else if ( has_hpx ) sprintf( impl_str, "hpx,single" ); + else if ( has_openmp && has_pthreads ) sprintf( impl_str, "openmp,pthreads,single" ); + else if ( has_openmp ) sprintf( impl_str, "openmp,single" ); + else if ( has_pthreads ) sprintf( impl_str, "pthreads,single" ); + else sprintf( impl_str, "single only" ); // Describe the default threading implementation that would be active if // or when BLIS_THREAD_IMPL is unset. if ( openmp_is_def ) sprintf( def_impl_unset_str, "openmp" ); else if ( pthreads_is_def ) sprintf( def_impl_unset_str, "pthreads" ); + else if ( hpx_is_def ) sprintf( def_impl_unset_str, "hpx" ); else sprintf( def_impl_unset_str, "single" ); // Describe the default threading implementation as the testsuite was // currently run. if ( ti == BLIS_OPENMP ) sprintf( def_impl_set_str, "openmp" ); else if ( ti == BLIS_POSIX ) sprintf( def_impl_set_str, "pthreads" ); + else if ( ti == BLIS_HPX ) sprintf( def_impl_set_str, "hpx" ); else sprintf( def_impl_set_str, "single" ); // Describe the status of jrir thread partitioning. From dce876915278b0c1db488d16a36d05c383d14caa Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Sat, 5 Nov 2022 00:02:40 -0500 Subject: [PATCH 19/21] Add HPX initialize/finalize to testsuite. Full testsuite passes now with HPX threading. --- blastest/src/cblat1.c | 40 +- blastest/src/cblat2.c | 426 ++++++++++----------- blastest/src/cblat3.c | 602 +++++++++++++++--------------- blastest/src/dblat1.c | 164 ++++---- blastest/src/dblat2.c | 444 +++++++++++----------- blastest/src/dblat3.c | 428 ++++++++++----------- blastest/src/sblat1.c | 180 ++++----- blastest/src/sblat2.c | 354 +++++++++--------- blastest/src/sblat3.c | 380 +++++++++---------- blastest/src/zblat1.c | 48 ++- blastest/src/zblat2.c | 518 +++++++++++++------------- blastest/src/zblat3.c | 636 ++++++++++++++++---------------- common.mk | 5 - configure | 2 +- frame/thread/bli_thrcomm.c | 4 +- frame/thread/bli_thread_hpx.cpp | 16 + frame/thread/bli_thread_hpx.h | 4 + testsuite/src/test_libblis.c | 8 + 18 files changed, 2201 insertions(+), 2058 deletions(-) diff --git a/blastest/src/cblat1.c b/blastest/src/cblat1.c index 6065116628..6562946847 100644 --- a/blastest/src/cblat1.c +++ b/blastest/src/cblat1.c @@ -68,6 +68,11 @@ static real c_b52 = 0.f; /* ===================================================================== */ /* Main program */ int main(void) { +#ifdef BLIS_ENABLE_HPX + char* program = "cblat1"; + bli_thread_initialize_hpx( 1, &program ); +#endif + /* Initialized data */ static real sfac = 9.765625e-4f; @@ -136,7 +141,12 @@ static real c_b52 = 0.f; } s_stop("", (ftnlen)0); - return 0; +#ifdef BLIS_ENABLE_HPX + return bli_thread_finalize_hpx(); +#else + // Return peacefully. + return 0; +#endif } /* main */ /* Subroutine */ int header_(void) @@ -230,7 +240,7 @@ static real c_b52 = 0.f; complex q__1; /* Builtin functions */ - integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); @@ -238,15 +248,15 @@ static real c_b52 = 0.f; integer i__; complex cx[8]; integer np1, len; - extern /* Subroutine */ int cscal_(integer *, complex *, complex *, - integer *), ctest_(integer *, complex *, complex *, complex *, + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *), ctest_(integer *, complex *, complex *, complex *, real *); complex mwpcs[5], mwpct[5]; extern real scnrm2_(integer *, complex *, integer *); extern /* Subroutine */ int itest1_(integer *, integer *), stest1_(real *, real *, real *, real *); extern integer icamax_(integer *, complex *, integer *); - extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer *); extern real scasum_(integer *, complex *, integer *); @@ -465,7 +475,7 @@ static real c_b52 = 0.f; complex q__1; /* Builtin functions */ - integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); @@ -481,9 +491,9 @@ static real c_b52 = 0.f; #else complex cdotc_( #endif - integer *, complex *, integer + integer *, complex *, integer *, complex *, integer *); - extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, complex *, integer *); extern /* Complex */ #ifdef BLIS_ENABLE_COMPLEX_RETURN_INTEL @@ -491,13 +501,13 @@ complex cdotc_( #else complex cdotu_( #endif - integer *, complex *, integer + integer *, complex *, integer *, complex *, integer *); - extern /* Subroutine */ int cswap_(integer *, complex *, integer *, - complex *, integer *), ctest_(integer *, complex *, complex *, + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *), ctest_(integer *, complex *, complex *, complex *, real *); integer ksize; - extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, + extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, integer *, complex *, integer *); /* Fortran I/O blocks */ @@ -691,7 +701,7 @@ complex cdotu_( sfac) { real scomp[1], strue[1]; - extern /* Subroutine */ int stest_(integer *, real *, real *, real *, + extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *); /* ************************* STEST1 ***************************** */ @@ -733,7 +743,7 @@ real sdiff_(real *sa, real *sb) return ret_val; } /* sdiff_ */ -/* Subroutine */ int ctest_(integer *len, complex *ccomp, complex *ctrue, +/* Subroutine */ int ctest_(integer *len, complex *ccomp, complex *ctrue, complex *csize, real *sfac) { /* System generated locals */ @@ -745,7 +755,7 @@ real sdiff_(real *sa, real *sb) /* Local variables */ integer i__; real scomp[20], ssize[20], strue[20]; - extern /* Subroutine */ int stest_(integer *, real *, real *, real *, + extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *); /* **************************** CTEST ***************************** */ diff --git a/blastest/src/cblat2.c b/blastest/src/cblat2.c index 2916a36a4e..08d215aee3 100644 --- a/blastest/src/cblat2.c +++ b/blastest/src/cblat2.c @@ -158,10 +158,15 @@ static logical c_false = FALSE_; /* ===================================================================== */ /* Main program */ int main(void) { +#ifdef BLIS_ENABLE_HPX + char* program = "cblat2"; + bli_thread_initialize_hpx( 1, &program ); +#endif + /* Initialized data */ - static char snames[6*17] = "CGEMV " "CGBMV " "CHEMV " "CHBMV " "CHPMV " - "CTRMV " "CTBMV " "CTPMV " "CTRSV " "CTBSV " "CTPSV " "CGERC " + static char snames[6*17] = "CGEMV " "CGBMV " "CHEMV " "CHBMV " "CHPMV " + "CTRMV " "CTBMV " "CTPMV " "CTRSV " "CTBSV " "CTPSV " "CGERC " "CGERU " "CHER " "CHPR " "CHER2 " "CHPR2 "; /* Format strings */ @@ -209,10 +214,10 @@ static logical c_false = FALSE_; cllist cl__1; /* Builtin functions */ - integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, - char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), - s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, + char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), + s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); @@ -234,42 +239,42 @@ static logical c_false = FALSE_; integer ninc, nbet, ntra; logical rewi; integer nout; - extern /* Subroutine */ int cchk1_(char *, real *, real *, integer *, - integer *, logical *, logical *, logical *, integer *, integer *, - integer *, integer *, integer *, complex *, integer *, complex *, - integer *, integer *, integer *, integer *, complex *, complex *, - complex *, complex *, complex *, complex *, complex *, complex *, - complex *, complex *, real *, ftnlen), cchk2_(char *, real *, - real *, integer *, integer *, logical *, logical *, logical *, - integer *, integer *, integer *, integer *, integer *, complex *, - integer *, complex *, integer *, integer *, integer *, integer *, - complex *, complex *, complex *, complex *, complex *, complex *, - complex *, complex *, complex *, complex *, real *, ftnlen), - cchk3_(char *, real *, real *, integer *, integer *, logical *, - logical *, logical *, integer *, integer *, integer *, integer *, - integer *, integer *, integer *, integer *, complex *, complex *, - complex *, complex *, complex *, complex *, complex *, real *, - complex *, ftnlen), cchk4_(char *, real *, real *, integer *, - integer *, logical *, logical *, logical *, integer *, integer *, - integer *, complex *, integer *, integer *, integer *, integer *, - complex *, complex *, complex *, complex *, complex *, complex *, - complex *, complex *, complex *, complex *, real *, complex *, - ftnlen), cchk5_(char *, real *, real *, integer *, integer *, - logical *, logical *, logical *, integer *, integer *, integer *, - complex *, integer *, integer *, integer *, integer *, complex *, - complex *, complex *, complex *, complex *, complex *, complex *, - complex *, complex *, complex *, real *, complex *, ftnlen), - cchk6_(char *, real *, real *, integer *, integer *, logical *, - logical *, logical *, integer *, integer *, integer *, complex *, - integer *, integer *, integer *, integer *, complex *, complex *, - complex *, complex *, complex *, complex *, complex *, complex *, + extern /* Subroutine */ int cchk1_(char *, real *, real *, integer *, + integer *, logical *, logical *, logical *, integer *, integer *, + integer *, integer *, integer *, complex *, integer *, complex *, + integer *, integer *, integer *, integer *, complex *, complex *, + complex *, complex *, complex *, complex *, complex *, complex *, + complex *, complex *, real *, ftnlen), cchk2_(char *, real *, + real *, integer *, integer *, logical *, logical *, logical *, + integer *, integer *, integer *, integer *, integer *, complex *, + integer *, complex *, integer *, integer *, integer *, integer *, + complex *, complex *, complex *, complex *, complex *, complex *, + complex *, complex *, complex *, complex *, real *, ftnlen), + cchk3_(char *, real *, real *, integer *, integer *, logical *, + logical *, logical *, integer *, integer *, integer *, integer *, + integer *, integer *, integer *, integer *, complex *, complex *, + complex *, complex *, complex *, complex *, complex *, real *, + complex *, ftnlen), cchk4_(char *, real *, real *, integer *, + integer *, logical *, logical *, logical *, integer *, integer *, + integer *, complex *, integer *, integer *, integer *, integer *, + complex *, complex *, complex *, complex *, complex *, complex *, + complex *, complex *, complex *, complex *, real *, complex *, + ftnlen), cchk5_(char *, real *, real *, integer *, integer *, + logical *, logical *, logical *, integer *, integer *, integer *, + complex *, integer *, integer *, integer *, integer *, complex *, + complex *, complex *, complex *, complex *, complex *, complex *, + complex *, complex *, complex *, real *, complex *, ftnlen), + cchk6_(char *, real *, real *, integer *, integer *, logical *, + logical *, logical *, integer *, integer *, integer *, complex *, + integer *, integer *, integer *, integer *, complex *, complex *, + complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, complex *, ftnlen), cchke_(integer * , char *, integer *, ftnlen); logical fatal, trace; integer nidim; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * - , integer *, complex *, real *, complex *, real *, real *, + , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); char snaps[32], trans[1]; integer isnum; @@ -618,7 +623,7 @@ static logical c_false = FALSE_; goto L80; } for (i__ = 1; i__ <= 17; ++i__) { - if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) + if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L70; } @@ -677,7 +682,7 @@ static logical c_false = FALSE_; /* YY holds the exact result. On exit from CMVCH YT holds */ /* the result computed by CMVCH. */ *(unsigned char *)trans = 'N'; - cmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c__1, &c_b1, y, &c__1, yt, g, + cmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c__1, &c_b1, y, &c__1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lce_(yy, yt, &n); if (! same || err != 0.f) { @@ -690,7 +695,7 @@ static logical c_false = FALSE_; s_stop("", (ftnlen)0); } *(unsigned char *)trans = 'T'; - cmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c_n1, &c_b1, y, &c_n1, yt, g, + cmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c_n1, &c_b1, y, &c_n1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lce_(yy, yt, &n); if (! same || err != 0.f) { @@ -751,44 +756,44 @@ static logical c_false = FALSE_; /* Test CGEMV, 01, and CGBMV, 02. */ L140: cchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, - &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, + trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, + &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test CHEMV, 03, CHBMV, 04, and CHPMV, 05. */ L150: cchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, - &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, + trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, + &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test CTRMV, 06, CTBMV, 07, CTPMV, 08, */ /* CTRSV, 09, CTBSV, 10, and CTPSV, 11. */ L160: cchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, + trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, &c__65, &c__2, a, aa, as, y, yy, ys, yt, g, z__, (ftnlen) 6); goto L200; /* Test CGERC, 12, CGERU, 13. */ L170: cchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, - inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, + inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test CHER, 14, and CHPR, 15. */ L180: cchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, - inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, + inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test CHER2, 16, and CHPR2, 17. */ L190: cchk6_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, - inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, + inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); L200: @@ -830,15 +835,20 @@ static logical c_false = FALSE_; /* End of CBLAT2. */ - return 0; +#ifdef BLIS_ENABLE_HPX + return bli_thread_finalize_hpx(); +#else + // Return peacefully. + return 0; +#endif } /* main */ /* Subroutine */ int cchk1_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * - nalf, complex *alf, integer *nbet, complex *bet, integer *ninc, + nalf, complex *alf, integer *nbet, complex *bet, integer *ninc, integer *inc, integer *nmax, integer *incmax, complex *a, complex *aa, - complex *as, complex *x, complex *xx, complex *xs, complex *y, + complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, ftnlen sname_len) { /* Initialized data */ @@ -867,7 +877,7 @@ static logical c_false = FALSE_; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; alist al__1; @@ -887,26 +897,26 @@ static logical c_false = FALSE_; logical same; integer incx, incy; logical full, tran, null; - extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, - integer *, complex *, integer *, complex *, integer *, integer *, + extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; logical isame[13]; extern /* Subroutine */ int cgbmv_(char *, integer *, integer *, integer * , integer *, complex *, complex *, integer *, complex *, integer * - , complex *, complex *, integer *, ftnlen), cgemv_(char *, - integer *, integer *, complex *, complex *, integer *, complex *, + , complex *, complex *, integer *, ftnlen), cgemv_(char *, + integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen), cmvch_(char * , integer *, integer *, complex *, complex *, integer *, complex * - , integer *, complex *, complex *, integer *, complex *, real *, - complex *, real *, real *, logical *, integer *, logical *, + , integer *, complex *, complex *, integer *, complex *, real *, + complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs, incys; char trans[1]; logical banded; - extern logical lceres_(char *, char *, integer *, integer *, complex *, + extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; @@ -1089,9 +1099,9 @@ static logical c_false = FALSE_; transl.r = 0.f, transl.i = 0.f; i__7 = abs(incy); i__8 = ml - 1; - cmake_("GE", " ", " ", &c__1, &ml, &y[1], + cmake_("GE", " ", " ", &c__1, &ml, &y[1], &c__1, &yy[1], &i__7, &c__0, & - i__8, &reset, &transl, (ftnlen)2, + i__8, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; @@ -1099,7 +1109,7 @@ static logical c_false = FALSE_; /* Save every datum before calling the */ /* subroutine. */ - *(unsigned char *)transs = *(unsigned + *(unsigned char *)transs = *(unsigned char *)trans; ms = m; ns = n; @@ -1110,7 +1120,7 @@ static logical c_false = FALSE_; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; - as[i__8].r = aa[i__9].r, as[i__8].i = + as[i__8].r = aa[i__9].r, as[i__8].i = aa[i__9].i; /* L10: */ } @@ -1119,7 +1129,7 @@ static logical c_false = FALSE_; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; - xs[i__8].r = xx[i__9].r, xs[i__8].i = + xs[i__8].r = xx[i__9].r, xs[i__8].i = xx[i__9].i; /* L20: */ } @@ -1129,7 +1139,7 @@ static logical c_false = FALSE_; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; - ys[i__8].r = yy[i__9].r, ys[i__8].i = + ys[i__8].r = yy[i__9].r, ys[i__8].i = yy[i__9].i; /* L30: */ } @@ -1166,7 +1176,7 @@ static logical c_false = FALSE_; al__1.aunit = *ntra; f_rew(&al__1); } - cgemv_(trans, &m, &n, &alpha, &aa[1], + cgemv_(trans, &m, &n, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, & yy[1], &incy, (ftnlen)1); } else if (banded) { @@ -1225,7 +1235,7 @@ static logical c_false = FALSE_; isame[1] = ms == m; isame[2] = ns == n; if (full) { - isame[3] = als.r == alpha.r && als.i + isame[3] = als.r == alpha.r && als.i == alpha.i; isame[4] = lce_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; @@ -1247,13 +1257,13 @@ static logical c_false = FALSE_; } else if (banded) { isame[3] = kls == kl; isame[4] = kus == ku; - isame[5] = als.r == alpha.r && als.i + isame[5] = als.r == alpha.r && als.i == alpha.i; isame[6] = lce_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lce_(&xs[1], &xx[1], &lx); isame[9] = incxs == incx; - isame[10] = bls.r == beta.r && bls.i + isame[10] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[11] = lce_(&ys[1], &yy[1], & @@ -1295,8 +1305,8 @@ static logical c_false = FALSE_; cmvch_(trans, &m, &n, &alpha, &a[ a_offset], nmax, &x[1], &incx, - &beta, &y[1], &incy, &yt[1], - &g[1], &yy[1], eps, &err, + &beta, &y[1], &incy, &yt[1], + &g[1], &yy[1], eps, &err, fatal, nout, &c_true, (ftnlen) 1); errmax = max(errmax,err); @@ -1401,11 +1411,11 @@ static logical c_false = FALSE_; } /* cchk1_ */ /* Subroutine */ int cchk2_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * - nalf, complex *alf, integer *nbet, complex *bet, integer *ninc, + nalf, complex *alf, integer *nbet, complex *bet, integer *ninc, integer *inc, integer *nmax, integer *incmax, complex *a, complex *aa, - complex *as, complex *x, complex *xx, complex *xs, complex *y, + complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, ftnlen sname_len) { /* Initialized data */ @@ -1438,7 +1448,7 @@ static logical c_false = FALSE_; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; alist al__1; @@ -1447,7 +1457,7 @@ static logical c_false = FALSE_; f_rew(alist *); /* Local variables */ - integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, + integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, laa, lda; extern logical lce_(complex *, complex *, integer *); complex als, bls; @@ -1458,18 +1468,18 @@ static logical c_false = FALSE_; integer incx, incy; logical full, null; char uplo[1]; - extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, - integer *, complex *, integer *, complex *, integer *, integer *, + extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; logical isame[13]; extern /* Subroutine */ int chbmv_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * - , integer *, ftnlen), chemv_(char *, integer *, complex *, - complex *, integer *, complex *, integer *, complex *, complex *, + , integer *, ftnlen), chemv_(char *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen), cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * - , integer *, complex *, real *, complex *, real *, real *, + , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; extern /* Subroutine */ int chpmv_(char *, integer *, complex *, complex * @@ -1478,7 +1488,7 @@ static logical c_false = FALSE_; integer incxs, incys; char uplos[1]; logical banded, packed; - extern logical lceres_(char *, char *, integer *, integer *, complex *, + extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; @@ -1643,7 +1653,7 @@ static logical c_false = FALSE_; i__8 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &y[1], & c__1, &yy[1], &i__7, &c__0, &i__8, & - reset, &transl, (ftnlen)2, (ftnlen)1, + reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; @@ -1795,13 +1805,13 @@ static logical c_false = FALSE_; unsigned char *)uplos; isame[1] = ns == n; if (full) { - isame[2] = als.r == alpha.r && als.i == + isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lce_(&as[1], &aa[1], &laa); isame[4] = ldas == lda; isame[5] = lce_(&xs[1], &xx[1], &lx); isame[6] = incxs == incx; - isame[7] = bls.r == beta.r && bls.i == + isame[7] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[8] = lce_(&ys[1], &yy[1], &ly); @@ -1814,13 +1824,13 @@ static logical c_false = FALSE_; isame[9] = incys == incy; } else if (banded) { isame[2] = ks == k; - isame[3] = als.r == alpha.r && als.i == + isame[3] = als.r == alpha.r && als.i == alpha.i; isame[4] = lce_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lce_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; - isame[8] = bls.r == beta.r && bls.i == + isame[8] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[9] = lce_(&ys[1], &yy[1], &ly); @@ -1832,12 +1842,12 @@ static logical c_false = FALSE_; } isame[10] = incys == incy; } else if (packed) { - isame[2] = als.r == alpha.r && als.i == + isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lce_(&as[1], &aa[1], &laa); isame[4] = lce_(&xs[1], &xx[1], &lx); isame[5] = incxs == incx; - isame[6] = bls.r == beta.r && bls.i == + isame[6] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[7] = lce_(&ys[1], &yy[1], &ly); @@ -1875,8 +1885,8 @@ static logical c_false = FALSE_; /* Check the result. */ - cmvch_("N", &n, &n, &alpha, &a[a_offset], - nmax, &x[1], &incx, &beta, &y[1], + cmvch_("N", &n, &n, &alpha, &a[a_offset], + nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); @@ -1987,10 +1997,10 @@ static logical c_false = FALSE_; } /* cchk2_ */ /* Subroutine */ int cchk3_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * - ninc, integer *inc, integer *nmax, integer *incmax, complex *a, - complex *aa, complex *as, complex *x, complex *xx, complex *xs, + ninc, integer *inc, integer *nmax, integer *incmax, complex *a, + complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *xt, real *g, complex *z__, ftnlen sname_len) { /* Initialized data */ @@ -2040,36 +2050,36 @@ static logical c_false = FALSE_; integer incx; logical full, null; char uplo[1]; - extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, - integer *, complex *, integer *, complex *, integer *, integer *, + extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); char diags[1]; logical isame[13]; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * - , integer *, complex *, real *, complex *, real *, real *, + , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; - extern /* Subroutine */ int ctbmv_(char *, char *, char *, integer *, - integer *, complex *, integer *, complex *, integer *, ftnlen, - ftnlen, ftnlen), ctbsv_(char *, char *, char *, integer *, - integer *, complex *, integer *, complex *, integer *, ftnlen, + extern /* Subroutine */ int ctbmv_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, ftnlen, + ftnlen, ftnlen), ctbsv_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen); logical reset; integer incxs; char trans[1]; - extern /* Subroutine */ int ctpmv_(char *, char *, char *, integer *, + extern /* Subroutine */ int ctpmv_(char *, char *, char *, integer *, complex *, complex *, integer *, ftnlen, ftnlen, ftnlen), ctrmv_( char *, char *, char *, integer *, complex *, integer *, complex * - , integer *, ftnlen, ftnlen, ftnlen), ctpsv_(char *, char *, char - *, integer *, complex *, complex *, integer *, ftnlen, ftnlen, + , integer *, ftnlen, ftnlen, ftnlen), ctpsv_(char *, char *, char + *, integer *, complex *, complex *, integer *, ftnlen, ftnlen, ftnlen); char uplos[1]; - extern /* Subroutine */ int ctrsv_(char *, char *, char *, integer *, - complex *, integer *, complex *, integer *, ftnlen, ftnlen, + extern /* Subroutine */ int ctrsv_(char *, char *, char *, integer *, + complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen); logical banded, packed; - extern logical lceres_(char *, char *, integer *, integer *, complex *, + extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; @@ -2197,13 +2207,13 @@ static logical c_false = FALSE_; ; for (icd = 1; icd <= 2; ++icd) { - *(unsigned char *)diag = *(unsigned char *)&ichd[icd + *(unsigned char *)diag = *(unsigned char *)&ichd[icd - 1]; /* Generate the matrix A. */ transl.r = 0.f, transl.i = 0.f; - cmake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], + cmake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], nmax, &aa[1], &lda, &k, &k, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -2258,7 +2268,7 @@ static logical c_false = FALSE_; /* Call the subroutine. */ - if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) + if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) == 0) { if (full) { if (*trace) { @@ -2311,7 +2321,7 @@ static logical c_false = FALSE_; al__1.aunit = *ntra; f_rew(&al__1); } - ctbmv_(uplo, trans, diag, &n, &k, &aa[1], + ctbmv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { @@ -2392,7 +2402,7 @@ static logical c_false = FALSE_; al__1.aunit = *ntra; f_rew(&al__1); } - ctbsv_(uplo, trans, diag, &n, &k, &aa[1], + ctbsv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { @@ -2434,11 +2444,11 @@ static logical c_false = FALSE_; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)uplo == *(unsigned + isame[0] = *(unsigned char *)uplo == *(unsigned char *)uplos; - isame[1] = *(unsigned char *)trans == *(unsigned + isame[1] = *(unsigned char *)trans == *(unsigned char *)transs; - isame[2] = *(unsigned char *)diag == *(unsigned + isame[2] = *(unsigned char *)diag == *(unsigned char *)diags; isame[3] = ns == n; if (full) { @@ -2508,7 +2518,7 @@ static logical c_false = FALSE_; cmvch_(trans, &n, &n, &c_b2, &a[a_offset], nmax, &x[1], &incx, &c_b1, &z__[ - 1], &incx, &xt[1], &g[1], &xx[1], + 1], &incx, &xt[1], &g[1], &xx[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( @@ -2520,18 +2530,18 @@ static logical c_false = FALSE_; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = (i__ - 1) * abs(incx) + 1; - z__[i__5].r = xx[i__6].r, z__[i__5].i + z__[i__5].r = xx[i__6].r, z__[i__5].i = xx[i__6].i; i__5 = (i__ - 1) * abs(incx) + 1; i__6 = i__; - xx[i__5].r = x[i__6].r, xx[i__5].i = + xx[i__5].r = x[i__6].r, xx[i__5].i = x[i__6].i; /* L50: */ } cmvch_(trans, &n, &n, &c_b2, &a[a_offset], nmax, &z__[1], &incx, &c_b1, &x[ - 1], &incx, &xt[1], &g[1], &xx[1], - eps, &err, fatal, nout, &c_false, + 1], &incx, &xt[1], &g[1], &xx[1], + eps, &err, fatal, nout, &c_false, (ftnlen)1); } errmax = max(errmax,err); @@ -2634,10 +2644,10 @@ static logical c_false = FALSE_; } /* cchk3_ */ /* Subroutine */ int cchk4_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * - ninc, integer *inc, integer *nmax, integer *incmax, complex *a, - complex *aa, complex *as, complex *x, complex *xx, complex *xs, + ninc, integer *inc, integer *nmax, integer *incmax, complex *a, + complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, complex * z__, ftnlen sname_len) { @@ -2681,23 +2691,23 @@ static logical c_false = FALSE_; logical same, conj; integer incx, incy; logical null; - extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, - integer *, complex *, integer *, complex *, integer *, integer *, + extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen), cgerc_( - integer *, integer *, complex *, complex *, integer *, complex *, + integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *); complex alpha; logical isame[13]; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * - , integer *, complex *, real *, complex *, real *, real *, - logical *, integer *, logical *, ftnlen), cgeru_(integer *, - integer *, complex *, complex *, integer *, complex *, integer *, + , integer *, complex *, real *, complex *, real *, real *, + logical *, integer *, logical *, ftnlen), cgeru_(integer *, + integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *); integer nargs; logical reset; integer incxs, incys; - extern logical lceres_(char *, char *, integer *, integer *, complex *, + extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; @@ -2801,7 +2811,7 @@ static logical c_false = FALSE_; i__3 = abs(incx); i__4 = m - 1; cmake_("GE", " ", " ", &c__1, &m, &x[1], &c__1, &xx[1], &i__3, - &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, + &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (m > 1) { i__3 = m / 2; @@ -2840,7 +2850,7 @@ static logical c_false = FALSE_; transl.r = 0.f, transl.i = 0.f; i__5 = m - 1; i__6 = n - 1; - cmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], + cmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -2999,9 +3009,9 @@ static logical c_false = FALSE_; r_cnjg(&q__1, w); w[0].r = q__1.r, w[0].i = q__1.i; } - cmvch_("N", &m, &c__1, &alpha, &z__[1], nmax, + cmvch_("N", &m, &c__1, &alpha, &z__[1], nmax, w, &c__1, &c_b2, &a[j * a_dim1 + 1], & - c__1, &yt[1], &g[1], &aa[(j - 1) * + c__1, &yt[1], &g[1], &aa[(j - 1) * lda + 1], eps, &err, fatal, nout, & c_true, (ftnlen)1); errmax = max(errmax,err); @@ -3082,10 +3092,10 @@ static logical c_false = FALSE_; } /* cchk4_ */ /* Subroutine */ int cchk5_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * - ninc, integer *inc, integer *nmax, integer *incmax, complex *a, - complex *aa, complex *as, complex *x, complex *xx, complex *xs, + ninc, integer *inc, integer *nmax, integer *incmax, complex *a, + complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, complex * z__, ftnlen sname_len) { @@ -3130,24 +3140,24 @@ static logical c_false = FALSE_; integer ia, ja, ic, nc, jj, lj, in, ix, ns, lx, laa, lda; extern logical lce_(complex *, complex *, integer *); real err; - extern /* Subroutine */ int cher_(char *, integer *, real *, complex *, + extern /* Subroutine */ int cher_(char *, integer *, real *, complex *, integer *, complex *, integer *, ftnlen); integer ldas; logical same; - extern /* Subroutine */ int chpr_(char *, integer *, real *, complex *, + extern /* Subroutine */ int chpr_(char *, integer *, real *, complex *, integer *, complex *, ftnlen); real rals; integer incx; logical full, null; char uplo[1]; - extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, - integer *, complex *, integer *, complex *, integer *, integer *, + extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; logical isame[13]; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * - , integer *, complex *, real *, complex *, real *, real *, + , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; @@ -3156,7 +3166,7 @@ static logical c_false = FALSE_; char uplos[1]; logical packed; real ralpha; - extern logical lceres_(char *, char *, integer *, integer *, complex *, + extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; @@ -3261,7 +3271,7 @@ static logical c_false = FALSE_; i__3 = abs(incx); i__4 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, - &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, + &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__3 = n / 2; @@ -3336,7 +3346,7 @@ static logical c_false = FALSE_; al__1.aunit = *ntra; f_rew(&al__1); } - cher_(uplo, &n, &ralpha, &xx[1], &incx, &aa[1], &lda, + cher_(uplo, &n, &ralpha, &xx[1], &incx, &aa[1], &lda, (ftnlen)1); } else if (packed) { if (*trace) { @@ -3446,9 +3456,9 @@ static logical c_false = FALSE_; jj = j; lj = n - j + 1; } - cmvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, - &c__1, &c_b2, &a[jj + j * a_dim1], &c__1, - &yt[1], &g[1], &aa[ja], eps, &err, fatal, + cmvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, + &c__1, &c_b2, &a[jj + j * a_dim1], &c__1, + &yt[1], &g[1], &aa[ja], eps, &err, fatal, nout, &c_true, (ftnlen)1); if (full) { if (upper) { @@ -3547,10 +3557,10 @@ static logical c_false = FALSE_; } /* cchk5_ */ /* Subroutine */ int cchk6_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * - ninc, integer *inc, integer *nmax, integer *incmax, complex *a, - complex *aa, complex *as, complex *x, complex *xx, complex *xs, + ninc, integer *inc, integer *nmax, integer *incmax, complex *a, + complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, complex * z__, ftnlen sname_len) { @@ -3580,7 +3590,7 @@ static logical c_false = FALSE_; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, + integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; complex q__1, q__2, q__3; alist al__1; @@ -3603,17 +3613,17 @@ static logical c_false = FALSE_; logical full, null; char uplo[1]; extern /* Subroutine */ int cher2_(char *, integer *, complex *, complex * - , integer *, complex *, integer *, complex *, integer *, ftnlen), - chpr2_(char *, integer *, complex *, complex *, integer *, - complex *, integer *, complex *, ftnlen), cmake_(char *, char *, - char *, integer *, integer *, complex *, integer *, complex *, - integer *, integer *, integer *, logical *, complex *, ftnlen, + , integer *, complex *, integer *, complex *, integer *, ftnlen), + chpr2_(char *, integer *, complex *, complex *, integer *, + complex *, integer *, complex *, ftnlen), cmake_(char *, char *, + char *, integer *, integer *, complex *, integer *, complex *, + integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; logical isame[13]; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * - , integer *, complex *, real *, complex *, real *, real *, + , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; @@ -3621,7 +3631,7 @@ static logical c_false = FALSE_; logical upper; char uplos[1]; logical packed; - extern logical lceres_(char *, char *, integer *, integer *, complex *, + extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; @@ -3728,7 +3738,7 @@ static logical c_false = FALSE_; i__3 = abs(incx); i__4 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, - &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, + &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__3 = n / 2; @@ -3768,7 +3778,7 @@ static logical c_false = FALSE_; transl.r = 0.f, transl.i = 0.f; i__5 = n - 1; i__6 = n - 1; - cmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], + cmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -3956,14 +3966,14 @@ static logical c_false = FALSE_; i__5 = n; for (j = 1; j <= i__5; ++j) { r_cnjg(&q__2, &z__[j + (z_dim1 << 1)]); - q__1.r = alpha.r * q__2.r - alpha.i * q__2.i, - q__1.i = alpha.r * q__2.i + alpha.i * + q__1.r = alpha.r * q__2.r - alpha.i * q__2.i, + q__1.i = alpha.r * q__2.i + alpha.i * q__2.r; w[0].r = q__1.r, w[0].i = q__1.i; r_cnjg(&q__2, &alpha); r_cnjg(&q__3, &z__[j + z_dim1]); - q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, - q__1.i = q__2.r * q__3.i + q__2.i * + q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, + q__1.i = q__2.r * q__3.i + q__2.i * q__3.r; w[1].r = q__1.r, w[1].i = q__1.i; if (upper) { @@ -3973,8 +3983,8 @@ static logical c_false = FALSE_; jj = j; lj = n - j + 1; } - cmvch_("N", &lj, &c__2, &c_b2, &z__[jj + - z_dim1], nmax, w, &c__1, &c_b2, &a[jj + cmvch_("N", &lj, &c__2, &c_b2, &z__[jj + + z_dim1], nmax, w, &c__1, &c_b2, &a[jj + j * a_dim1], &c__1, &yt[1], &g[1], & aa[ja], eps, &err, fatal, nout, & c_true, (ftnlen)1); @@ -4079,7 +4089,7 @@ static logical c_false = FALSE_; } /* cchk6_ */ -/* Subroutine */ int cchke_(integer *isnum, char *srnamt, integer *nout, +/* Subroutine */ int cchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ @@ -4093,40 +4103,40 @@ static logical c_false = FALSE_; /* Local variables */ complex a[1] /* was [1][1] */, x[1], y[1], beta; - extern /* Subroutine */ int cher_(char *, integer *, real *, complex *, + extern /* Subroutine */ int cher_(char *, integer *, real *, complex *, integer *, complex *, integer *, ftnlen), chpr_(char *, integer *, - real *, complex *, integer *, complex *, ftnlen), cher2_(char *, - integer *, complex *, complex *, integer *, complex *, integer *, + real *, complex *, integer *, complex *, ftnlen), cher2_(char *, + integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *, ftnlen), chpr2_(char *, integer *, complex * - , complex *, integer *, complex *, integer *, complex *, ftnlen), - cgerc_(integer *, integer *, complex *, complex *, integer *, + , complex *, integer *, complex *, integer *, complex *, ftnlen), + cgerc_(integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *); complex alpha; extern /* Subroutine */ int cgbmv_(char *, integer *, integer *, integer * , integer *, complex *, complex *, integer *, complex *, integer * - , complex *, complex *, integer *, ftnlen), chbmv_(char *, - integer *, integer *, complex *, complex *, integer *, complex *, + , complex *, complex *, integer *, ftnlen), chbmv_(char *, + integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen), cgemv_(char * , integer *, integer *, complex *, complex *, integer *, complex * , integer *, complex *, complex *, integer *, ftnlen), chemv_( - char *, integer *, complex *, complex *, integer *, complex *, + char *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen), cgeru_( - integer *, integer *, complex *, complex *, integer *, complex *, - integer *, complex *, integer *), ctbmv_(char *, char *, char *, - integer *, integer *, complex *, integer *, complex *, integer *, - ftnlen, ftnlen, ftnlen), chpmv_(char *, integer *, complex *, - complex *, complex *, integer *, complex *, complex *, integer *, - ftnlen), ctbsv_(char *, char *, char *, integer *, integer *, - complex *, integer *, complex *, integer *, ftnlen, ftnlen, - ftnlen), ctpmv_(char *, char *, char *, integer *, complex *, - complex *, integer *, ftnlen, ftnlen, ftnlen), ctrmv_(char *, - char *, char *, integer *, complex *, integer *, complex *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *, complex *, integer *), ctbmv_(char *, char *, char *, + integer *, integer *, complex *, integer *, complex *, integer *, + ftnlen, ftnlen, ftnlen), chpmv_(char *, integer *, complex *, + complex *, complex *, integer *, complex *, complex *, integer *, + ftnlen), ctbsv_(char *, char *, char *, integer *, integer *, + complex *, integer *, complex *, integer *, ftnlen, ftnlen, + ftnlen), ctpmv_(char *, char *, char *, integer *, complex *, + complex *, integer *, ftnlen, ftnlen, ftnlen), ctrmv_(char *, + char *, char *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen), ctpsv_(char *, char *, char *, - integer *, complex *, complex *, integer *, ftnlen, ftnlen, - ftnlen), ctrsv_(char *, char *, char *, integer *, complex *, + integer *, complex *, complex *, integer *, ftnlen, ftnlen, + ftnlen), ctrsv_(char *, char *, char *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen); real ralpha; - extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical + extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ @@ -4655,9 +4665,9 @@ static logical c_false = FALSE_; } /* cchke_ */ -/* Subroutine */ int cmake_(char *type__, char *uplo, char *diag, integer *m, - integer *n, complex *a, integer *nmax, complex *aa, integer *lda, - integer *kl, integer *ku, logical *reset, complex *transl, ftnlen +/* Subroutine */ int cmake_(char *type__, char *uplo, char *diag, integer *m, + integer *n, complex *a, integer *nmax, complex *aa, integer *lda, + integer *kl, integer *ku, logical *reset, complex *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ @@ -4718,7 +4728,7 @@ static logical c_false = FALSE_; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { - if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) + if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) { i__3 = i__ + j * a_dim1; cbeg_(&q__2, reset); @@ -4953,8 +4963,8 @@ static logical c_false = FALSE_; /* Subroutine */ int cmvch_(char *trans, integer *m, integer *n, complex * alpha, complex *a, integer *nmax, complex *x, integer *incx, complex * - beta, complex *y, integer *incy, complex *yt, real *g, complex *yy, - real *eps, real *err, logical *fatal, integer *nout, logical *mv, + beta, complex *y, integer *incy, complex *yt, real *g, complex *yy, + real *eps, real *err, logical *fatal, integer *nout, logical *mv, ftnlen trans_len) { /* Format strings */ @@ -5057,15 +5067,15 @@ static logical c_false = FALSE_; i__4 = iy; i__5 = j + i__ * a_dim1; i__6 = jx; - q__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, + q__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, q__2.i = a[i__5].r * x[i__6].i + a[i__5].i * x[i__6] .r; q__1.r = yt[i__4].r + q__2.r, q__1.i = yt[i__4].i + q__2.i; yt[i__3].r = q__1.r, yt[i__3].i = q__1.i; i__3 = j + i__ * a_dim1; i__4 = jx; - g[iy] += ((r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[j - + i__ * a_dim1]), abs(r__2))) * ((r__3 = x[i__4].r, + g[iy] += ((r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[j + + i__ * a_dim1]), abs(r__2))) * ((r__3 = x[i__4].r, abs(r__3)) + (r__4 = r_imag(&x[jx]), abs(r__4))); jx += incxl; /* L10: */ @@ -5077,14 +5087,14 @@ static logical c_false = FALSE_; i__4 = iy; r_cnjg(&q__3, &a[j + i__ * a_dim1]); i__5 = jx; - q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i, q__2.i = + q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i, q__2.i = q__3.r * x[i__5].i + q__3.i * x[i__5].r; q__1.r = yt[i__4].r + q__2.r, q__1.i = yt[i__4].i + q__2.i; yt[i__3].r = q__1.r, yt[i__3].i = q__1.i; i__3 = j + i__ * a_dim1; i__4 = jx; - g[iy] += ((r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[j - + i__ * a_dim1]), abs(r__2))) * ((r__3 = x[i__4].r, + g[iy] += ((r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[j + + i__ * a_dim1]), abs(r__2))) * ((r__3 = x[i__4].r, abs(r__3)) + (r__4 = r_imag(&x[jx]), abs(r__4))); jx += incxl; /* L20: */ @@ -5096,7 +5106,7 @@ static logical c_false = FALSE_; i__4 = iy; i__5 = i__ + j * a_dim1; i__6 = jx; - q__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, + q__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, q__2.i = a[i__5].r * x[i__6].i + a[i__5].i * x[i__6] .r; q__1.r = yt[i__4].r + q__2.r, q__1.i = yt[i__4].i + q__2.i; @@ -5104,7 +5114,7 @@ static logical c_false = FALSE_; i__3 = i__ + j * a_dim1; i__4 = jx; g[iy] += ((r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[ - i__ + j * a_dim1]), abs(r__2))) * ((r__3 = x[i__4].r, + i__ + j * a_dim1]), abs(r__2))) * ((r__3 = x[i__4].r, abs(r__3)) + (r__4 = r_imag(&x[jx]), abs(r__4))); jx += incxl; /* L30: */ @@ -5112,7 +5122,7 @@ static logical c_false = FALSE_; } i__2 = iy; i__3 = iy; - q__2.r = alpha->r * yt[i__3].r - alpha->i * yt[i__3].i, q__2.i = + q__2.r = alpha->r * yt[i__3].r - alpha->i * yt[i__3].i, q__2.i = alpha->r * yt[i__3].i + alpha->i * yt[i__3].r; i__4 = iy; q__3.r = beta->r * y[i__4].r - beta->i * y[i__4].i, q__3.i = beta->r * @@ -5121,7 +5131,7 @@ static logical c_false = FALSE_; yt[i__2].r = q__1.r, yt[i__2].i = q__1.i; i__2 = iy; g[iy] = ((r__1 = alpha->r, abs(r__1)) + (r__2 = r_imag(alpha), abs( - r__2))) * g[iy] + ((r__3 = beta->r, abs(r__3)) + (r__4 = + r__2))) * g[iy] + ((r__3 = beta->r, abs(r__3)) + (r__4 = r_imag(beta), abs(r__4))) * ((r__5 = y[i__2].r, abs(r__5)) + ( r__6 = r_imag(&y[iy]), abs(r__6))); iy += incyl; @@ -5410,7 +5420,7 @@ real sdiff_(real *x, real *y) } /* sdiff_ */ -/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, +/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ diff --git a/blastest/src/cblat3.c b/blastest/src/cblat3.c index a5b870f0f3..e3d5e32a3c 100644 --- a/blastest/src/cblat3.c +++ b/blastest/src/cblat3.c @@ -140,9 +140,14 @@ static integer c_n1 = -1; /* ===================================================================== */ /* Main program */ int main(void) { +#ifdef BLIS_ENABLE_HPX + char* program = "cblat3"; + bli_thread_initialize_hpx( 1, &program ); +#endif + /* Initialized data */ - static char snames[6*9] = "CGEMM " "CHEMM " "CSYMM " "CTRMM " "CTRSM " + static char snames[6*9] = "CGEMM " "CHEMM " "CSYMM " "CTRMM " "CTRSM " "CHERK " "CSYRK " "CHER2K" "CSYR2K"; /* Format strings */ @@ -186,10 +191,10 @@ static integer c_n1 = -1; cllist cl__1; /* Builtin functions */ - integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, - char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), - s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, + char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), + s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); @@ -209,34 +214,34 @@ static integer c_n1 = -1; integer nbet, ntra; logical rewi; integer nout; - extern /* Subroutine */ int cchk1_(char *, real *, real *, integer *, - integer *, logical *, logical *, logical *, integer *, integer *, - integer *, complex *, integer *, complex *, integer *, complex *, - complex *, complex *, complex *, complex *, complex *, complex *, - complex *, complex *, complex *, real *, ftnlen), cchk2_(char *, - real *, real *, integer *, integer *, logical *, logical *, - logical *, integer *, integer *, integer *, complex *, integer *, - complex *, integer *, complex *, complex *, complex *, complex *, - complex *, complex *, complex *, complex *, complex *, complex *, - real *, ftnlen), cchk3_(char *, real *, real *, integer *, - integer *, logical *, logical *, logical *, integer *, integer *, - integer *, complex *, integer *, complex *, complex *, complex *, - complex *, complex *, complex *, complex *, real *, complex *, - ftnlen), cchk4_(char *, real *, real *, integer *, integer *, - logical *, logical *, logical *, integer *, integer *, integer *, - complex *, integer *, complex *, integer *, complex *, complex *, - complex *, complex *, complex *, complex *, complex *, complex *, - complex *, complex *, real *, ftnlen), cchk5_(char *, real *, - real *, integer *, integer *, logical *, logical *, logical *, - integer *, integer *, integer *, complex *, integer *, complex *, - integer *, complex *, complex *, complex *, complex *, complex *, - complex *, complex *, complex *, complex *, real *, complex *, + extern /* Subroutine */ int cchk1_(char *, real *, real *, integer *, + integer *, logical *, logical *, logical *, integer *, integer *, + integer *, complex *, integer *, complex *, integer *, complex *, + complex *, complex *, complex *, complex *, complex *, complex *, + complex *, complex *, complex *, real *, ftnlen), cchk2_(char *, + real *, real *, integer *, integer *, logical *, logical *, + logical *, integer *, integer *, integer *, complex *, integer *, + complex *, integer *, complex *, complex *, complex *, complex *, + complex *, complex *, complex *, complex *, complex *, complex *, + real *, ftnlen), cchk3_(char *, real *, real *, integer *, + integer *, logical *, logical *, logical *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, complex *, + complex *, complex *, complex *, complex *, real *, complex *, + ftnlen), cchk4_(char *, real *, real *, integer *, integer *, + logical *, logical *, logical *, integer *, integer *, integer *, + complex *, integer *, complex *, integer *, complex *, complex *, + complex *, complex *, complex *, complex *, complex *, complex *, + complex *, complex *, real *, ftnlen), cchk5_(char *, real *, + real *, integer *, integer *, logical *, logical *, logical *, + integer *, integer *, integer *, complex *, integer *, complex *, + integer *, complex *, complex *, complex *, complex *, complex *, + complex *, complex *, complex *, complex *, real *, complex *, ftnlen), cchke_(integer *, char *, integer *, ftnlen); logical fatal; - extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *, complex *, real *, complex *, - integer *, real *, real *, logical *, integer *, logical *, + extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *, complex *, real *, complex *, + integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); logical trace; integer nidim; @@ -508,7 +513,7 @@ static integer c_n1 = -1; goto L60; } for (i__ = 1; i__ <= 9; ++i__) { - if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) + if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L50; } @@ -571,7 +576,7 @@ static integer c_n1 = -1; *(unsigned char *)transa = 'N'; *(unsigned char *)transb = 'N'; cmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & - c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, + c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lce_(cc, ct, &n); if (! same || err != 0.f) { @@ -586,7 +591,7 @@ static integer c_n1 = -1; } *(unsigned char *)transb = 'C'; cmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & - c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, + c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lce_(cc, ct, &n); if (! same || err != 0.f) { @@ -619,7 +624,7 @@ static integer c_n1 = -1; *(unsigned char *)transa = 'C'; *(unsigned char *)transb = 'N'; cmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & - c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, + c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lce_(cc, ct, &n); if (! same || err != 0.f) { @@ -634,7 +639,7 @@ static integer c_n1 = -1; } *(unsigned char *)transb = 'C'; cmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & - c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, + c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lce_(cc, ct, &n); if (! same || err != 0.f) { @@ -688,34 +693,34 @@ static integer c_n1 = -1; /* Test CGEMM, 01. */ L140: cchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, - bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test CHEMM, 02, CSYMM, 03. */ L150: cchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, - bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test CTRMM, 04, CTRSM, 05. */ L160: cchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, ab, aa, as, &ab[4225], bb, bs, ct, g, c__, (ftnlen)6); goto L190; /* Test CHERK, 06, CSYRK, 07. */ L170: cchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, - bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test CHER2K, 08, CSYR2K, 09. */ L180: cchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, bb, bs, c__, cc, cs, ct, g, w, ( ftnlen)6); goto L190; @@ -759,14 +764,19 @@ static integer c_n1 = -1; /* End of CBLAT3. */ - return 0; +#ifdef BLIS_ENABLE_HPX + return bli_thread_finalize_hpx(); +#else + // Return peacefully. + return 0; +#endif } /* main */ /* Subroutine */ int cchk1_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * nbet, complex *bet, integer *nmax, complex *a, complex *aa, complex * - as, complex *b, complex *bb, complex *bs, complex *c__, complex *cc, + as, complex *b, complex *bb, complex *bs, complex *c__, complex *cc, complex *cs, complex *ct, real *g, ftnlen sname_len) { /* Initialized data */ @@ -791,7 +801,7 @@ static integer c_n1 = -1; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; @@ -800,7 +810,7 @@ static integer c_n1 = -1; f_rew(alist *); /* Local variables */ - integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, + integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, ica, icb, laa, lbb, lda, lcc, ldb, ldc; extern logical lce_(complex *, complex *, integer *); complex als, bls; @@ -808,21 +818,21 @@ static integer c_n1 = -1; complex beta; integer ldas, ldbs, ldcs; logical same, null; - extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, - integer *, complex *, integer *, complex *, integer *, logical *, + extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; - extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *, ftnlen, ftnlen), cmmch_(char *, - char *, integer *, integer *, integer *, complex *, complex *, - integer *, complex *, integer *, complex *, complex *, integer *, + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *, ftnlen, ftnlen), cmmch_(char *, + char *, integer *, integer *, integer *, complex *, complex *, + integer *, complex *, integer *, complex *, complex *, integer *, complex *, real *, complex *, integer *, real *, real *, logical * , integer *, logical *, ftnlen, ftnlen); logical isame[13], trana, tranb; integer nargs; logical reset; - extern logical lceres_(char *, char *, integer *, integer *, complex *, + extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); char tranas[1], tranbs[1], transa[1], transb[1]; real errmax; @@ -915,7 +925,7 @@ static integer c_n1 = -1; for (ica = 1; ica <= 3; ++ica) { *(unsigned char *)transa = *(unsigned char *)&ich[ica - 1] ; - trana = *(unsigned char *)transa == 'T' || *(unsigned + trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; if (trana) { @@ -943,9 +953,9 @@ static integer c_n1 = -1; ftnlen)1); for (icb = 1; icb <= 3; ++icb) { - *(unsigned char *)transb = *(unsigned char *)&ich[icb + *(unsigned char *)transb = *(unsigned char *)&ich[icb - 1]; - tranb = *(unsigned char *)transb == 'T' || *(unsigned + tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; if (tranb) { @@ -1086,13 +1096,13 @@ static integer c_n1 = -1; isame[2] = ms == m; isame[3] = ns == n; isame[4] = ks == k; - isame[5] = als.r == alpha.r && als.i == + isame[5] = als.r == alpha.r && als.i == alpha.i; isame[6] = lce_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lce_(&bs[1], &bb[1], &lbb); isame[9] = ldbs == ldb; - isame[10] = bls.r == beta.r && bls.i == + isame[10] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[11] = lce_(&cs[1], &cc[1], &lcc); @@ -1130,9 +1140,9 @@ static integer c_n1 = -1; cmmch_(transa, transb, &m, &n, &k, &alpha, &a[a_offset], nmax, &b[b_offset], - nmax, &beta, &c__[c_offset], + nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, - eps, &err, fatal, nout, &c_true, + eps, &err, fatal, nout, &c_true, (ftnlen)1, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ @@ -1214,10 +1224,10 @@ static integer c_n1 = -1; } /* cchk1_ */ /* Subroutine */ int cchk2_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * nbet, complex *bet, integer *nmax, complex *a, complex *aa, complex * - as, complex *b, complex *bb, complex *bs, complex *c__, complex *cc, + as, complex *b, complex *bb, complex *bs, complex *c__, complex *cc, complex *cs, complex *ct, real *g, ftnlen sname_len) { /* Initialized data */ @@ -1243,7 +1253,7 @@ static integer c_n1 = -1; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; alist al__1; @@ -1252,7 +1262,7 @@ static integer c_n1 = -1; integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ - integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, + integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, ldb, ldc; extern logical lce_(complex *, complex *, integer *); integer ics; @@ -1265,26 +1275,26 @@ static integer c_n1 = -1; char side[1]; logical conj, left, null; char uplo[1]; - extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, - integer *, complex *, integer *, complex *, integer *, logical *, + extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; - extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *, complex *, real *, complex *, - integer *, real *, real *, logical *, integer *, logical *, - ftnlen, ftnlen), chemm_(char *, char *, integer *, integer *, - complex *, complex *, integer *, complex *, integer *, complex *, + extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *, complex *, real *, complex *, + integer *, real *, real *, logical *, integer *, logical *, + ftnlen, ftnlen), chemm_(char *, char *, integer *, integer *, + complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); logical isame[13]; char sides[1]; integer nargs; logical reset; - extern /* Subroutine */ int csymm_(char *, char *, integer *, integer *, - complex *, complex *, integer *, complex *, integer *, complex *, + extern /* Subroutine */ int csymm_(char *, char *, integer *, integer *, + complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); char uplos[1]; - extern logical lceres_(char *, char *, integer *, integer *, complex *, + extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; @@ -1426,7 +1436,7 @@ static integer c_n1 = -1; /* Generate the matrix C. */ - cmake_("GE", " ", " ", &m, &n, &c__[c_offset], + cmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -1522,9 +1532,9 @@ static integer c_n1 = -1; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)sides == *(unsigned + isame[0] = *(unsigned char *)sides == *(unsigned char *)side; - isame[1] = *(unsigned char *)uplos == *(unsigned + isame[1] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[2] = ms == m; isame[3] = ns == n; @@ -1569,14 +1579,14 @@ static integer c_n1 = -1; if (left) { cmmch_("N", "N", &m, &n, &m, &alpha, &a[ - a_offset], nmax, &b[b_offset], + a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { cmmch_("N", "N", &m, &n, &n, &alpha, &b[ - b_offset], nmax, &a[a_offset], + b_offset], nmax, &a[a_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( @@ -1657,9 +1667,9 @@ static integer c_n1 = -1; } /* cchk2_ */ /* Subroutine */ int cchk3_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * - nmax, complex *a, complex *aa, complex *as, complex *b, complex *bb, + nmax, complex *a, complex *aa, complex *as, complex *b, complex *bb, complex *bs, complex *ct, real *g, complex *c__, ftnlen sname_len) { /* Initialized data */ @@ -1686,7 +1696,7 @@ static integer c_n1 = -1; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; complex q__1; alist al__1; @@ -1708,27 +1718,27 @@ static integer c_n1 = -1; char side[1]; logical left, null; char uplo[1]; - extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, - integer *, complex *, integer *, complex *, integer *, logical *, + extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; char diags[1]; - extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *, complex *, real *, complex *, - integer *, real *, real *, logical *, integer *, logical *, + extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *, complex *, real *, complex *, + integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; char sides[1]; integer nargs; logical reset; - extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, - integer *, integer *, complex *, complex *, integer *, complex *, + extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), ctrsm_(char *, char *, - char *, char *, integer *, integer *, complex *, complex *, + char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); char uplos[1]; - extern logical lceres_(char *, char *, integer *, integer *, complex *, + extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); char tranas[1], transa[1]; real errmax; @@ -1867,7 +1877,7 @@ static integer c_n1 = -1; /* Generate the matrix B. */ - cmake_("GE", " ", " ", &m, &n, &b[b_offset], + cmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -1939,7 +1949,7 @@ static integer c_n1 = -1; } ctrmm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, - (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } else if (s_cmp(sname + 3, "SM", (ftnlen)2, ( ftnlen)2) == 0) { @@ -1972,7 +1982,7 @@ static integer c_n1 = -1; } ctrsm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, - (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } @@ -1998,7 +2008,7 @@ static integer c_n1 = -1; unsigned char *)diag; isame[4] = ms == m; isame[5] = ns == n; - isame[6] = als.r == alpha.r && als.i == + isame[6] = als.r == alpha.r && als.i == alpha.i; isame[7] = lce_(&as[1], &aa[1], &laa); isame[8] = ldas == lda; @@ -2042,18 +2052,18 @@ static integer c_n1 = -1; cmmch_(transa, "N", &m, &n, &m, & alpha, &a[a_offset], nmax, &b[b_offset], nmax, & - c_b1, &c__[c_offset], + c_b1, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ - 1], &ldb, eps, &err, + 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { cmmch_("N", transa, &m, &n, &n, & alpha, &b[b_offset], nmax, &a[a_offset], nmax, & - c_b1, &c__[c_offset], + c_b1, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ - 1], &ldb, eps, &err, + 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } @@ -2066,14 +2076,14 @@ static integer c_n1 = -1; i__4 = n; for (j = 1; j <= i__4; ++j) { i__5 = m; - for (i__ = 1; i__ <= i__5; ++i__) + for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + j * c_dim1; i__7 = i__ + (j - 1) * ldb; c__[i__6].r = bb[i__7].r, c__[i__6].i = bb[i__7].i; i__6 = i__ + (j - 1) * ldb; i__7 = i__ + j * b_dim1; - q__1.r = alpha.r * b[i__7].r - alpha.i * b[i__7].i, + q__1.r = alpha.r * b[i__7].r - alpha.i * b[i__7].i, q__1.i = alpha.r * b[i__7].i + alpha.i * b[ i__7].r; bb[i__6].r = q__1.r, bb[i__6].i = q__1.i; @@ -2084,20 +2094,20 @@ static integer c_n1 = -1; if (left) { cmmch_(transa, "N", &m, &n, &m, & - c_b2, &a[a_offset], nmax, + c_b2, &a[a_offset], nmax, &c__[c_offset], nmax, & - c_b1, &b[b_offset], nmax, + c_b1, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & - ldb, eps, &err, fatal, + ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } else { cmmch_("N", transa, &m, &n, &n, & - c_b2, &c__[c_offset], - nmax, &a[a_offset], nmax, + c_b2, &c__[c_offset], + nmax, &a[a_offset], nmax, &c_b1, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & - ldb, eps, &err, fatal, + ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } @@ -2179,10 +2189,10 @@ static integer c_n1 = -1; } /* cchk3_ */ /* Subroutine */ int cchk4_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * nbet, complex *bet, integer *nmax, complex *a, complex *aa, complex * - as, complex *b, complex *bb, complex *bs, complex *c__, complex *cc, + as, complex *b, complex *bb, complex *bs, complex *c__, complex *cc, complex *cs, complex *ct, real *g, ftnlen sname_len) { /* Initialized data */ @@ -2213,7 +2223,7 @@ static integer c_n1 = -1; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; complex q__1; alist al__1; @@ -2236,16 +2246,16 @@ static integer c_n1 = -1; real rals; logical tran, null; char uplo[1]; - extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, - integer *, complex *, integer *, complex *, integer *, logical *, + extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; - extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *, complex *, real *, complex *, - integer *, real *, real *, logical *, integer *, logical *, - ftnlen, ftnlen), cherk_(char *, char *, integer *, integer *, - real *, complex *, integer *, real *, complex *, integer *, + extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *, complex *, real *, complex *, + integer *, real *, real *, logical *, integer *, logical *, + ftnlen, ftnlen), cherk_(char *, char *, integer *, integer *, + real *, complex *, integer *, real *, complex *, integer *, ftnlen, ftnlen); real rbeta; logical isame[13]; @@ -2254,12 +2264,12 @@ static integer c_n1 = -1; logical reset; char trans[1]; logical upper; - extern /* Subroutine */ int csyrk_(char *, char *, integer *, integer *, - complex *, complex *, integer *, complex *, complex *, integer *, + extern /* Subroutine */ int csyrk_(char *, char *, integer *, integer *, + complex *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); char uplos[1]; real ralpha; - extern logical lceres_(char *, char *, integer *, integer *, complex *, + extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; char transs[1], transt[1]; @@ -2402,7 +2412,7 @@ static integer c_n1 = -1; } null = n <= 0; if (conj) { - null = null || (k <= 0 || ralpha == 0.f) && + null = null || (k <= 0 || ralpha == 0.f) && rbeta == 1.f; } @@ -2481,7 +2491,7 @@ static integer c_n1 = -1; f_rew(&al__1); } cherk_(uplo, trans, &n, &k, &ralpha, &aa[1], & - lda, &rbeta, &cc[1], &ldc, (ftnlen)1, + lda, &rbeta, &cc[1], &ldc, (ftnlen)1, (ftnlen)1); } else { if (*trace) { @@ -2528,16 +2538,16 @@ static integer c_n1 = -1; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)uplos == *(unsigned + isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; - isame[1] = *(unsigned char *)transs == *(unsigned + isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; if (conj) { isame[4] = rals == ralpha; } else { - isame[4] = als.r == alpha.r && als.i == + isame[4] = als.r == alpha.r && als.i == alpha.i; } isame[5] = lce_(&as[1], &aa[1], &laa); @@ -2545,7 +2555,7 @@ static integer c_n1 = -1; if (conj) { isame[7] = rbets == rbeta; } else { - isame[7] = bets.r == beta.r && bets.i == + isame[7] = bets.r == beta.r && bets.i == beta.i; } if (null) { @@ -2599,19 +2609,19 @@ static integer c_n1 = -1; } if (tran) { cmmch_(transt, "N", &lj, &c__1, &k, & - alpha, &a[jj * a_dim1 + 1], - nmax, &a[j * a_dim1 + 1], - nmax, &beta, &c__[jj + j * - c_dim1], nmax, &ct[1], &g[1], - &cc[jc], &ldc, eps, &err, + alpha, &a[jj * a_dim1 + 1], + nmax, &a[j * a_dim1 + 1], + nmax, &beta, &c__[jj + j * + c_dim1], nmax, &ct[1], &g[1], + &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { cmmch_("N", transt, &lj, &c__1, &k, & - alpha, &a[jj + a_dim1], nmax, + alpha, &a[jj + a_dim1], nmax, &a[j + a_dim1], nmax, &beta, & c__[jj + j * c_dim1], nmax, & - ct[1], &g[1], &cc[jc], &ldc, + ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, & c_true, (ftnlen)1, (ftnlen)1); } @@ -2720,10 +2730,10 @@ static integer c_n1 = -1; } /* cchk4_ */ /* Subroutine */ int cchk5_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * nbet, complex *bet, integer *nmax, complex *ab, complex *aa, complex * - as, complex *bb, complex *bs, complex *c__, complex *cc, complex *cs, + as, complex *bb, complex *bs, complex *c__, complex *cc, complex *cs, complex *ct, real *g, complex *w, ftnlen sname_len) { /* Initialized data */ @@ -2778,14 +2788,14 @@ static integer c_n1 = -1; complex bets; logical tran, null; char uplo[1]; - extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, - integer *, complex *, integer *, complex *, integer *, logical *, + extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; - extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *, complex *, real *, complex *, - integer *, real *, real *, logical *, integer *, logical *, + extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *, complex *, real *, complex *, + integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); real rbeta; logical isame[13]; @@ -2795,12 +2805,12 @@ static integer c_n1 = -1; char trans[1]; logical upper; char uplos[1]; - extern /* Subroutine */ int cher2k_(char *, char *, integer *, integer *, - complex *, complex *, integer *, complex *, integer *, real *, - complex *, integer *, ftnlen, ftnlen), csyr2k_(char *, char *, - integer *, integer *, complex *, complex *, integer *, complex *, + extern /* Subroutine */ int cher2k_(char *, char *, integer *, integer *, + complex *, complex *, integer *, complex *, integer *, real *, + complex *, integer *, ftnlen, ftnlen), csyr2k_(char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); - extern logical lceres_(char *, char *, integer *, integer *, complex *, + extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; char transs[1], transt[1]; @@ -2957,7 +2967,7 @@ static integer c_n1 = -1; } null = n <= 0; if (conj) { - null = null || (k <= 0 || alpha.r == 0.f && + null = null || (k <= 0 || alpha.r == 0.f && alpha.i == 0.f) && rbeta == 1.f; } @@ -3092,9 +3102,9 @@ static integer c_n1 = -1; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)uplos == *(unsigned + isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; - isame[1] = *(unsigned char *)transs == *(unsigned + isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; @@ -3106,7 +3116,7 @@ static integer c_n1 = -1; if (conj) { isame[9] = rbets == rbeta; } else { - isame[9] = bets.r == beta.r && bets.i == + isame[9] = bets.r == beta.r && bets.i == beta.i; } if (null) { @@ -3162,20 +3172,20 @@ static integer c_n1 = -1; i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; - i__8 = (j - 1 << 1) * *nmax + k + + i__8 = (j - 1 << 1) * *nmax + k + i__; - q__1.r = alpha.r * ab[i__8].r - - alpha.i * ab[i__8].i, + q__1.r = alpha.r * ab[i__8].r - + alpha.i * ab[i__8].i, q__1.i = alpha.r * ab[ i__8].i + alpha.i * ab[ i__8].r; - w[i__7].r = q__1.r, w[i__7].i = + w[i__7].r = q__1.r, w[i__7].i = q__1.i; if (conj) { i__7 = k + i__; r_cnjg(&q__2, &alpha); i__8 = (j - 1 << 1) * *nmax + i__; - q__1.r = q__2.r * ab[i__8].r - q__2.i * ab[i__8].i, + q__1.r = q__2.r * ab[i__8].r - q__2.i * ab[i__8].i, q__1.i = q__2.r * ab[i__8].i + q__2.i * ab[ i__8].r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; @@ -3183,7 +3193,7 @@ static integer c_n1 = -1; i__7 = k + i__; i__8 = (j - 1 << 1) * *nmax + i__; q__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] - .i, q__1.i = alpha.r * ab[i__8].i + alpha.i + .i, q__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; } @@ -3194,9 +3204,9 @@ static integer c_n1 = -1; i__8 = *nmax << 1; cmmch_(transt, "N", &lj, &c__1, &i__6, &c_b2, &ab[jjab], &i__7, &w[ - 1], &i__8, &beta, &c__[jj + j + 1], &i__8, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1] - , &cc[jc], &ldc, eps, &err, + , &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { @@ -3205,14 +3215,14 @@ static integer c_n1 = -1; if (conj) { i__7 = i__; r_cnjg(&q__2, &ab[(k + i__ - 1) * *nmax + j]); - q__1.r = alpha.r * q__2.r - alpha.i * q__2.i, - q__1.i = alpha.r * q__2.i + alpha.i * + q__1.r = alpha.r * q__2.r - alpha.i * q__2.i, + q__1.i = alpha.r * q__2.i + alpha.i * q__2.r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; i__7 = k + i__; i__8 = (i__ - 1) * *nmax + j; q__2.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] - .i, q__2.i = alpha.r * ab[i__8].i + alpha.i + .i, q__2.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; r_cnjg(&q__1, &q__2); w[i__7].r = q__1.r, w[i__7].i = q__1.i; @@ -3220,13 +3230,13 @@ static integer c_n1 = -1; i__7 = i__; i__8 = (k + i__ - 1) * *nmax + j; q__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] - .i, q__1.i = alpha.r * ab[i__8].i + alpha.i + .i, q__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; i__7 = k + i__; i__8 = (i__ - 1) * *nmax + j; q__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] - .i, q__1.i = alpha.r * ab[i__8].i + alpha.i + .i, q__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; } @@ -3236,9 +3246,9 @@ static integer c_n1 = -1; i__7 = *nmax << 1; cmmch_("N", "N", &lj, &c__1, &i__6, & c_b2, &ab[jj], nmax, &w[1], & - i__7, &beta, &c__[jj + j * - c_dim1], nmax, &ct[1], &g[1], - &cc[jc], &ldc, eps, &err, + i__7, &beta, &c__[jj + j * + c_dim1], nmax, &ct[1], &g[1], + &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } @@ -3351,7 +3361,7 @@ static integer c_n1 = -1; } /* cchk5_ */ -/* Subroutine */ int cchke_(integer *isnum, char *srnamt, integer *nout, +/* Subroutine */ int cchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ @@ -3364,34 +3374,34 @@ static integer c_n1 = -1; integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ - complex a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] + complex a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] /* was [2][1] */, beta, alpha; - extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *, ftnlen, ftnlen), chemm_(char *, - char *, integer *, integer *, complex *, complex *, integer *, - complex *, integer *, complex *, complex *, integer *, ftnlen, - ftnlen), cherk_(char *, char *, integer *, integer *, real *, - complex *, integer *, real *, complex *, integer *, ftnlen, + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *, ftnlen, ftnlen), chemm_(char *, + char *, integer *, integer *, complex *, complex *, integer *, + complex *, integer *, complex *, complex *, integer *, ftnlen, + ftnlen), cherk_(char *, char *, integer *, integer *, real *, + complex *, integer *, real *, complex *, integer *, ftnlen, ftnlen); real rbeta; - extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, - integer *, integer *, complex *, complex *, integer *, complex *, + extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), csymm_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, - integer *, complex *, complex *, integer *, ftnlen, ftnlen), - ctrsm_(char *, char *, char *, char *, integer *, integer *, - complex *, complex *, integer *, complex *, integer *, ftnlen, - ftnlen, ftnlen, ftnlen), csyrk_(char *, char *, integer *, - integer *, complex *, complex *, integer *, complex *, complex *, - integer *, ftnlen, ftnlen), cher2k_(char *, char *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - real *, complex *, integer *, ftnlen, ftnlen), csyr2k_(char *, - char *, integer *, integer *, complex *, complex *, integer *, - complex *, integer *, complex *, complex *, integer *, ftnlen, + integer *, complex *, complex *, integer *, ftnlen, ftnlen), + ctrsm_(char *, char *, char *, char *, integer *, integer *, + complex *, complex *, integer *, complex *, integer *, ftnlen, + ftnlen, ftnlen, ftnlen), csyrk_(char *, char *, integer *, + integer *, complex *, complex *, integer *, complex *, complex *, + integer *, ftnlen, ftnlen), cher2k_(char *, char *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + real *, complex *, integer *, ftnlen, ftnlen), csyr2k_(char *, + char *, integer *, integer *, complex *, complex *, integer *, + complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real ralpha; - extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical + extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ @@ -3451,302 +3461,302 @@ static integer c_n1 = -1; } L10: infoc_1.infot = 1; - cgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; - cgemm_("/", "C", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("/", "C", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; - cgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; - cgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; - cgemm_("C", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; - cgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - cgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - cgemm_("N", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - cgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - cgemm_("C", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - cgemm_("C", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - cgemm_("C", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - cgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - cgemm_("T", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - cgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - cgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - cgemm_("N", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - cgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - cgemm_("C", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - cgemm_("C", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - cgemm_("C", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - cgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - cgemm_("T", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - cgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - cgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - cgemm_("N", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - cgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - cgemm_("C", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - cgemm_("C", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - cgemm_("C", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - cgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - cgemm_("T", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - cgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - cgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - cgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - cgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - cgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, + cgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - cgemm_("C", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - cgemm_("C", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - cgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, + cgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - cgemm_("T", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - cgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - cgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - cgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, + cgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - cgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, + cgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - cgemm_("N", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - cgemm_("C", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - cgemm_("T", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - cgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - cgemm_("C", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - cgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - cgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, + cgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - cgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, + cgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - cgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, + cgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - cgemm_("C", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - cgemm_("C", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - cgemm_("C", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("C", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - cgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - cgemm_("T", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - cgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + cgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); @@ -4926,9 +4936,9 @@ static integer c_n1 = -1; } /* cchke_ */ -/* Subroutine */ int cmake_(char *type__, char *uplo, char *diag, integer *m, - integer *n, complex *a, integer *nmax, complex *aa, integer *lda, - logical *reset, complex *transl, ftnlen type_len, ftnlen uplo_len, +/* Subroutine */ int cmake_(char *type__, char *uplo, char *diag, integer *m, + integer *n, complex *a, integer *nmax, complex *aa, integer *lda, + logical *reset, complex *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ @@ -5114,10 +5124,10 @@ static integer c_n1 = -1; } /* cmake_ */ /* Subroutine */ int cmmch_(char *transa, char *transb, integer *m, integer * - n, integer *kk, complex *alpha, complex *a, integer *lda, complex *b, - integer *ldb, complex *beta, complex *c__, integer *ldc, complex *ct, + n, integer *kk, complex *alpha, complex *a, integer *lda, complex *b, + integer *ldb, complex *beta, complex *c__, integer *ldc, complex *ct, real *g, complex *cc, integer *ldcc, real *eps, real *err, logical * - fatal, integer *nout, logical *mv, ftnlen transa_len, ftnlen + fatal, integer *nout, logical *mv, ftnlen transa_len, ftnlen transb_len) { /* Format strings */ @@ -5131,7 +5141,7 @@ static integer c_n1 = -1; " \002,i3)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, cc_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; real r__1, r__2, r__3, r__4, r__5, r__6; complex q__1, q__2, q__3, q__4; @@ -5190,9 +5200,9 @@ static integer c_n1 = -1; cc -= cc_offset; /* Function Body */ - trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == + trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; - tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == + tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; ctrana = *(unsigned char *)transa == 'C'; ctranb = *(unsigned char *)transb == 'C'; @@ -5220,17 +5230,17 @@ static integer c_n1 = -1; i__5 = i__; i__6 = i__ + k * a_dim1; i__7 = k + j * b_dim1; - q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7].i, + q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7].i, q__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[ i__7].r; - q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + + q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = i__ + k * a_dim1; i__5 = k + j * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag( &a[i__ + k * a_dim1]), abs(r__2))) * ((r__3 = b[ - i__5].r, abs(r__3)) + (r__4 = r_imag(&b[k + j * + i__5].r, abs(r__3)) + (r__4 = r_imag(&b[k + j * b_dim1]), abs(r__4))); /* L20: */ } @@ -5246,15 +5256,15 @@ static integer c_n1 = -1; i__5 = i__; r_cnjg(&q__3, &a[k + i__ * a_dim1]); i__6 = k + j * b_dim1; - q__2.r = q__3.r * b[i__6].r - q__3.i * b[i__6].i, + q__2.r = q__3.r * b[i__6].r - q__3.i * b[i__6].i, q__2.i = q__3.r * b[i__6].i + q__3.i * b[i__6] .r; - q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + + q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = k + j * b_dim1; - g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = + g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) * (( r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag( &b[k + j * b_dim1]), abs(r__4))); @@ -5274,12 +5284,12 @@ static integer c_n1 = -1; q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7] .i, q__2.i = a[i__6].r * b[i__7].i + a[i__6] .i * b[i__7].r; - q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + + q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = k + j * b_dim1; - g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = + g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) * (( r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag( &b[k + j * b_dim1]), abs(r__4))); @@ -5298,15 +5308,15 @@ static integer c_n1 = -1; i__5 = i__; i__6 = i__ + k * a_dim1; r_cnjg(&q__3, &b[j + k * b_dim1]); - q__2.r = a[i__6].r * q__3.r - a[i__6].i * q__3.i, - q__2.i = a[i__6].r * q__3.i + a[i__6].i * + q__2.r = a[i__6].r * q__3.r - a[i__6].i * q__3.i, + q__2.i = a[i__6].r * q__3.i + a[i__6].i * q__3.r; - q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + + q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = i__ + k * a_dim1; i__5 = j + k * b_dim1; - g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = + g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[i__ + k * a_dim1]), abs(r__2))) * (( r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag( &b[j + k * b_dim1]), abs(r__4))); @@ -5326,12 +5336,12 @@ static integer c_n1 = -1; q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7] .i, q__2.i = a[i__6].r * b[i__7].i + a[i__6] .i * b[i__7].r; - q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + + q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = i__ + k * a_dim1; i__5 = j + k * b_dim1; - g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = + g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[i__ + k * a_dim1]), abs(r__2))) * (( r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag( &b[j + k * b_dim1]), abs(r__4))); @@ -5351,17 +5361,17 @@ static integer c_n1 = -1; i__5 = i__; r_cnjg(&q__3, &a[k + i__ * a_dim1]); r_cnjg(&q__4, &b[j + k * b_dim1]); - q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, - q__2.i = q__3.r * q__4.i + q__3.i * + q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, + q__2.i = q__3.r * q__4.i + q__3.i * q__4.r; - q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) - * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 + * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag(&b[j + k * b_dim1]), abs(r__4))); /* L120: */ } @@ -5376,17 +5386,17 @@ static integer c_n1 = -1; i__5 = i__; r_cnjg(&q__3, &a[k + i__ * a_dim1]); i__6 = j + k * b_dim1; - q__2.r = q__3.r * b[i__6].r - q__3.i * b[i__6].i, + q__2.r = q__3.r * b[i__6].r - q__3.i * b[i__6].i, q__2.i = q__3.r * b[i__6].i + q__3.i * b[ i__6].r; - q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) - * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 + * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag(&b[j + k * b_dim1]), abs(r__4))); /* L140: */ } @@ -5403,17 +5413,17 @@ static integer c_n1 = -1; i__5 = i__; i__6 = k + i__ * a_dim1; r_cnjg(&q__3, &b[j + k * b_dim1]); - q__2.r = a[i__6].r * q__3.r - a[i__6].i * q__3.i, - q__2.i = a[i__6].r * q__3.i + a[i__6].i * + q__2.r = a[i__6].r * q__3.r - a[i__6].i * q__3.i, + q__2.i = a[i__6].r * q__3.i + a[i__6].i * q__3.r; - q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) - * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 + * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag(&b[j + k * b_dim1]), abs(r__4))); /* L160: */ } @@ -5429,16 +5439,16 @@ static integer c_n1 = -1; i__6 = k + i__ * a_dim1; i__7 = j + k * b_dim1; q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[ - i__7].i, q__2.i = a[i__6].r * b[i__7].i + + i__7].i, q__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[i__7].r; - q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) - * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 + * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag(&b[j + k * b_dim1]), abs(r__4))); /* L180: */ } @@ -5451,17 +5461,17 @@ static integer c_n1 = -1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; - q__2.r = alpha->r * ct[i__4].r - alpha->i * ct[i__4].i, q__2.i = + q__2.r = alpha->r * ct[i__4].r - alpha->i * ct[i__4].i, q__2.i = alpha->r * ct[i__4].i + alpha->i * ct[i__4].r; i__5 = i__ + j * c_dim1; - q__3.r = beta->r * c__[i__5].r - beta->i * c__[i__5].i, q__3.i = + q__3.r = beta->r * c__[i__5].r - beta->i * c__[i__5].i, q__3.i = beta->r * c__[i__5].i + beta->i * c__[i__5].r; q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; ct[i__3].r = q__1.r, ct[i__3].i = q__1.i; i__3 = i__ + j * c_dim1; - g[i__] = ((r__1 = alpha->r, abs(r__1)) + (r__2 = r_imag(alpha), + g[i__] = ((r__1 = alpha->r, abs(r__1)) + (r__2 = r_imag(alpha), abs(r__2))) * g[i__] + ((r__3 = beta->r, abs(r__3)) + ( - r__4 = r_imag(beta), abs(r__4))) * ((r__5 = c__[i__3].r, + r__4 = r_imag(beta), abs(r__4))) * ((r__5 = c__[i__3].r, abs(r__5)) + (r__6 = r_imag(&c__[i__ + j * c_dim1]), abs( r__6))); /* L200: */ @@ -5772,7 +5782,7 @@ real sdiff_(real *x, real *y) } /* sdiff_ */ -/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, +/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ diff --git a/blastest/src/dblat1.c b/blastest/src/dblat1.c index 14665d844f..e848671787 100644 --- a/blastest/src/dblat1.c +++ b/blastest/src/dblat1.c @@ -70,6 +70,11 @@ static real c_b81 = 0.f; /* ===================================================================== */ /* Main program */ int main(void) { +#ifdef BLIS_ENABLE_HPX + char* program = "dblat1"; + bli_thread_initialize_hpx( 1, &program ); +#endif + /* Initialized data */ static doublereal sfac = 9.765625e-4; @@ -85,7 +90,7 @@ static real c_b81 = 0.f; /* Local variables */ integer ic; - extern /* Subroutine */ int check0_(doublereal *), check1_(doublereal *), + extern /* Subroutine */ int check0_(doublereal *), check1_(doublereal *), check2_(doublereal *), check3_(doublereal *), header_(void); /* Fortran I/O blocks */ @@ -124,11 +129,11 @@ static real c_b81 = 0.f; combla_1.incy = 9999; if (combla_1.icase == 3 || combla_1.icase == 11) { check0_(&sfac); - } else if (combla_1.icase == 7 || combla_1.icase == 8 || + } else if (combla_1.icase == 7 || combla_1.icase == 8 || combla_1.icase == 9 || combla_1.icase == 10) { check1_(&sfac); - } else if (combla_1.icase == 1 || combla_1.icase == 2 || - combla_1.icase == 5 || combla_1.icase == 6 || combla_1.icase + } else if (combla_1.icase == 1 || combla_1.icase == 2 || + combla_1.icase == 5 || combla_1.icase == 6 || combla_1.icase == 12 || combla_1.icase == 13) { check2_(&sfac); } else if (combla_1.icase == 4) { @@ -143,7 +148,12 @@ static real c_b81 = 0.f; } s_stop("", (ftnlen)0); - return 0; +#ifdef BLIS_ENABLE_HPX + return bli_thread_finalize_hpx(); +#else + // Return peacefully. + return 0; +#endif } /* main */ /* Subroutine */ int header_(void) @@ -201,17 +211,17 @@ static real c_b81 = 0.f; static doublereal dc1[8] = { .6,.8,-.6,.8,.6,1.,0.,1. }; /* Builtin functions */ - integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, k; doublereal sa, sb, sc, ss, dtemp[9]; - extern /* Subroutine */ int drotg_(doublereal *, doublereal *, doublereal - *, doublereal *), stest_(integer *, doublereal *, doublereal *, - doublereal *, doublereal *), stest1_(doublereal *, doublereal *, - doublereal *, doublereal *), drotmg_(doublereal *, doublereal *, + extern /* Subroutine */ int drotg_(doublereal *, doublereal *, doublereal + *, doublereal *), stest_(integer *, doublereal *, doublereal *, + doublereal *, doublereal *), stest1_(doublereal *, doublereal *, + doublereal *, doublereal *), drotmg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); /* Fortran I/O blocks */ @@ -319,7 +329,7 @@ static real c_b81 = 0.f; doublereal d__1; /* Builtin functions */ - integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); @@ -328,12 +338,12 @@ static real c_b81 = 0.f; doublereal sx[8]; integer np1, len; extern doublereal dnrm2_(integer *, doublereal *, integer *); - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); extern doublereal dasum_(integer *, doublereal *, integer *); doublereal stemp[1], strue[8]; - extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, - doublereal *, doublereal *), itest1_(integer *, integer *), + extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, + doublereal *, doublereal *), itest1_(integer *, integer *), stest1_(doublereal *, doublereal *, doublereal *, doublereal *); extern integer idamax_(integer *, doublereal *, integer *); @@ -375,11 +385,11 @@ static real c_b81 = 0.f; stest1_(&d__1, stemp, stemp, sfac); } else if (combla_1.icase == 9) { /* .. DSCAL .. */ - dscal_(&combla_1.n, &sa[(combla_1.incx - 1) * 5 + np1 - 1], + dscal_(&combla_1.n, &sa[(combla_1.incx - 1) * 5 + np1 - 1], sx, &combla_1.incx); i__1 = len; for (i__ = 1; i__ <= i__1; ++i__) { - strue[i__ - 1] = dtrue5[i__ + (np1 + combla_1.incx * 5 << + strue[i__ - 1] = dtrue5[i__ + (np1 + combla_1.incx * 5 << 3) - 49]; /* L40: */ } @@ -446,71 +456,71 @@ static real c_b81 = 0.f; -3.,-4.,5.,0.,0.,2.,-3.,0.,1.,5.,2.,0.,-4. }; static struct { doublereal e_1[448]; - } equiv_3 = {{ .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., - .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, - 0., 0., 0., 0., 0., 0., -.8, 0., 0., 0., 0., 0., 0., -.9, 0., - 0., 0., 0., 0., 0., 3.5, 0., 0., 0., 0., 0., 0., .6, .1, 0., - 0., 0., 0., 0., -.8, 3.8, 0., 0., 0., 0., 0., -.9, 2.8, 0., + } equiv_3 = {{ .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., + .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, + 0., 0., 0., 0., 0., 0., -.8, 0., 0., 0., 0., 0., 0., -.9, 0., + 0., 0., 0., 0., 0., 3.5, 0., 0., 0., 0., 0., 0., .6, .1, 0., + 0., 0., 0., 0., -.8, 3.8, 0., 0., 0., 0., 0., -.9, 2.8, 0., 0., 0., 0., 0., 3.5, -.4, 0., 0., 0., 0., 0., .6, .1, -.5, .8, 0., 0., 0., -.8, 3.8, -2.2, -1.2, 0., 0., 0., -.9, 2.8, -1.4, - -1.3, 0., 0., 0., 3.5, -.4, -2.2, 4.7, 0., 0., 0., .6, 0., - 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., - 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., - 0., 0., 0., -.8, 0., 0., 0., 0., 0., 0., -.9, 0., 0., 0., 0., - 0., 0., 3.5, 0., 0., 0., 0., 0., 0., .6, .1, -.5, 0., 0., 0., + -1.3, 0., 0., 0., 3.5, -.4, -2.2, 4.7, 0., 0., 0., .6, 0., + 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., + 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., + 0., 0., 0., -.8, 0., 0., 0., 0., 0., 0., -.9, 0., 0., 0., 0., + 0., 0., 3.5, 0., 0., 0., 0., 0., 0., .6, .1, -.5, 0., 0., 0., 0., 0., .1, -3., 0., 0., 0., 0., -.3, .1, -2., 0., 0., 0., 0., - 3.3, .1, -2., 0., 0., 0., 0., .6, .1, -.5, .8, .9, -.3, -.4, - -2., .1, 1.4, .8, .6, -.3, -2.8, -1.8, .1, 1.3, .8, 0., -.3, - -1.9, 3.8, .1, -3.1, .8, 4.8, -.3, -1.5, .6, 0., 0., 0., 0., - 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., - 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., + 3.3, .1, -2., 0., 0., 0., 0., .6, .1, -.5, .8, .9, -.3, -.4, + -2., .1, 1.4, .8, .6, -.3, -2.8, -1.8, .1, 1.3, .8, 0., -.3, + -1.9, 3.8, .1, -3.1, .8, 4.8, -.3, -1.5, .6, 0., 0., 0., 0., + 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., + 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., -.8, 0., 0., 0., 0., 0., 0., -.9, 0., 0., 0., 0., 0., 0., 3.5, 0., 0., 0., 0., 0., 0., .6, .1, -.5, 0., 0., 0., 0., 4.8, .1, - -3., 0., 0., 0., 0., 3.3, .1, -2., 0., 0., 0., 0., 2.1, .1, - -2., 0., 0., 0., 0., .6, .1, -.5, .8, .9, -.3, -.4, -1.6, .1, - -2.2, .8, 5.4, -.3, -2.8, -1.5, .1, -1.4, .8, 3.6, -.3, -1.9, + -3., 0., 0., 0., 0., 3.3, .1, -2., 0., 0., 0., 0., 2.1, .1, + -2., 0., 0., 0., 0., .6, .1, -.5, .8, .9, -.3, -.4, -1.6, .1, + -2.2, .8, 5.4, -.3, -2.8, -1.5, .1, -1.4, .8, 3.6, -.3, -1.9, 3.7, .1, -2.2, .8, 3.6, -.3, -1.5, .6, 0., 0., 0., 0., 0., 0., - .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, - 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., -.8, 0., - 0., 0., 0., 0., 0., -.9, 0., 0., 0., 0., 0., 0., 3.5, 0., 0., - 0., 0., 0., 0., .6, .1, 0., 0., 0., 0., 0., -.8, -1., 0., 0., + .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, + 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., -.8, 0., + 0., 0., 0., 0., 0., -.9, 0., 0., 0., 0., 0., 0., 3.5, 0., 0., + 0., 0., 0., 0., .6, .1, 0., 0., 0., 0., 0., -.8, -1., 0., 0., 0., 0., 0., -.9, -.8, 0., 0., 0., 0., 0., 3.5, .8, 0., 0., 0., 0., 0., .6, .1, -.5, .8, 0., 0., 0., -.8, -1., 1.4, -1.6, 0., - 0., 0., -.9, -.8, 1.3, -1.6, 0., 0., 0., 3.5, .8, -3.1, 4.8, + 0., 0., -.9, -.8, 1.3, -1.6, 0., 0., 0., 3.5, .8, -3.1, 4.8, 0., 0., 0. }}; static struct { doublereal e_1[448]; - } equiv_7 = {{ .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., - .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, - 0., 0., 0., 0., 0., 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., + } equiv_7 = {{ .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., + .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, + 0., 0., 0., 0., 0., 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., 0., 0., 0., 0., 0., -2.6, 0., 0., 0., 0., 0., 0., .5, -.9, 0., - 0., 0., 0., 0., .7, -4.8, 0., 0., 0., 0., 0., 1.7, -.7, 0., - 0., 0., 0., 0., -2.6, 3.5, 0., 0., 0., 0., 0., .5, -.9, .3, - .7, 0., 0., 0., .7, -4.8, 3., 1.1, 0., 0., 0., 1.7, -.7, -.7, + 0., 0., 0., 0., .7, -4.8, 0., 0., 0., 0., 0., 1.7, -.7, 0., + 0., 0., 0., 0., -2.6, 3.5, 0., 0., 0., 0., 0., .5, -.9, .3, + .7, 0., 0., 0., .7, -4.8, 3., 1.1, 0., 0., 0., 1.7, -.7, -.7, 2.3, 0., 0., 0., -2.6, 3.5, -.7, -3.6, 0., 0., 0., .5, 0., 0., - 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., - 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., - 0., 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., 0., 0., 0., 0., + 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., + 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., + 0., 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., 0., 0., 0., 0., 0., -2.6, 0., 0., 0., 0., 0., 0., .5, -.9, .3, 0., 0., 0., 0., - 4., -.9, -.3, 0., 0., 0., 0., -.5, -.9, 1.5, 0., 0., 0., 0., + 4., -.9, -.3, 0., 0., 0., 0., -.5, -.9, 1.5, 0., 0., 0., 0., -1.5, -.9, -1.8, 0., 0., 0., 0., .5, -.9, .3, .7, -.6, .2, .8, - 3.7, -.9, -1.2, .7, -1.5, .2, 2.2, -.3, -.9, 2.1, .7, -1.6, - .2, 2., -1.6, -.9, -2.1, .7, 2.9, .2, -3.8, .5, 0., 0., 0., - 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., - 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., - 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., 0., 0., 0., 0., 0., + 3.7, -.9, -1.2, .7, -1.5, .2, 2.2, -.3, -.9, 2.1, .7, -1.6, + .2, 2., -1.6, -.9, -2.1, .7, 2.9, .2, -3.8, .5, 0., 0., 0., + 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., + 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., + 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., 0., 0., 0., 0., 0., -2.6, 0., 0., 0., 0., 0., 0., .5, -.9, 0., 0., 0., 0., 0., 4., - -6.3, 0., 0., 0., 0., 0., -.5, .3, 0., 0., 0., 0., 0., -1.5, - 3., 0., 0., 0., 0., 0., .5, -.9, .3, .7, 0., 0., 0., 3.7, - -7.2, 3., 1.7, 0., 0., 0., -.3, .9, -.7, 1.9, 0., 0., 0., - -1.6, 2.7, -.7, -3.4, 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., - .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, - 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .7, 0., + -6.3, 0., 0., 0., 0., 0., -.5, .3, 0., 0., 0., 0., 0., -1.5, + 3., 0., 0., 0., 0., 0., .5, -.9, .3, .7, 0., 0., 0., 3.7, + -7.2, 3., 1.7, 0., 0., 0., -.3, .9, -.7, 1.9, 0., 0., 0., + -1.6, 2.7, -.7, -3.4, 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., + .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, + 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., 0., 0., 0., 0., 0., -2.6, 0., 0., - 0., 0., 0., 0., .5, -.9, .3, 0., 0., 0., 0., .7, -.9, 1.2, + 0., 0., 0., 0., .5, -.9, .3, 0., 0., 0., 0., .7, -.9, 1.2, 0., 0., 0., 0., 1.7, -.9, .5, 0., 0., 0., 0., -2.6, -.9, -1.3, - 0., 0., 0., 0., .5, -.9, .3, .7, -.6, .2, .8, .7, -.9, 1.2, + 0., 0., 0., 0., .5, -.9, .3, .7, -.6, .2, .8, .7, -.9, 1.2, .7, -1.5, .2, 1.6, 1.7, -.9, .5, .7, -1.6, .2, 2.4, -2.6, -.9, -1.3, .7, 2.9, .2, -4. }}; @@ -521,7 +531,7 @@ static real c_b81 = 0.f; doublereal d__1; /* Builtin functions */ - integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); @@ -532,7 +542,7 @@ static real c_b81 = 0.f; doublereal sx[7], sy[7]; integer kni; doublereal stx[7], sty[7]; - extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); integer kpar, lenx, leny; #define dt19x ((doublereal *)&equiv_3) @@ -547,16 +557,16 @@ static real c_b81 = 0.f; #define dt19yc ((doublereal *)&equiv_7 + 224) #define dt19yd ((doublereal *)&equiv_7 + 336) extern doublereal dsdot_(integer *, real *, integer *, real *, integer *); - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer ksize; - extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, - integer *, doublereal *, integer *), drotm_(integer *, doublereal + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), drotm_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *), dswap_( integer *, doublereal *, integer *, doublereal *, integer *); doublereal ssize[7]; - extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, - doublereal *, doublereal *), stest1_(doublereal *, doublereal *, + extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, + doublereal *, doublereal *), stest1_(doublereal *, doublereal *, doublereal *, doublereal *); /* Fortran I/O blocks */ @@ -616,7 +626,7 @@ static real c_b81 = 0.f; /* .. DDOT .. */ d__1 = ddot_(&combla_1.n, sx, &combla_1.incx, sy, & combla_1.incy); - stest1_(&d__1, &dt7[kn + (ki << 2) - 5], &ssize1[kn - 1], + stest1_(&d__1, &dt7[kn + (ki << 2) - 5], &ssize1[kn - 1], sfac); } else if (combla_1.icase == 2) { /* .. DAXPY .. */ @@ -653,9 +663,9 @@ static real c_b81 = 0.f; for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; - stx[i__ - 1] = dt19x[i__ + (kpar + (kni << 2)) * 7 - + stx[i__ - 1] = dt19x[i__ + (kpar + (kni << 2)) * 7 - 36]; - sty[i__ - 1] = dt19y[i__ + (kpar + (kni << 2)) * 7 - + sty[i__ - 1] = dt19y[i__ + (kpar + (kni << 2)) * 7 - 36]; } @@ -746,7 +756,7 @@ static real c_b81 = 0.f; 1.17,1.17,1.17,1.17,1.17,1.17,1.17 }; /* Builtin functions */ - integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); @@ -755,13 +765,13 @@ static real c_b81 = 0.f; doublereal sx[7], sy[7], stx[7], sty[7]; integer lenx, leny; doublereal mwpc[11]; - extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); integer mwpn[11]; doublereal mwps[11], mwpx[5], mwpy[5]; integer ksize; doublereal copyx[5], copyy[5]; - extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, + extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal mwptx[55] /* was [11][5] */, mwpty[55] /* was [11][5] */; @@ -1090,11 +1100,11 @@ static real c_b81 = 0.f; } /* testdsdot_ */ -/* Subroutine */ int stest1_(doublereal *scomp1, doublereal *strue1, +/* Subroutine */ int stest1_(doublereal *scomp1, doublereal *strue1, doublereal *ssize, doublereal *sfac) { doublereal scomp[1], strue[1]; - extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, + extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *); /* ************************* STEST1 ***************************** */ diff --git a/blastest/src/dblat2.c b/blastest/src/dblat2.c index 0cdc8f16f3..7982c67c50 100644 --- a/blastest/src/dblat2.c +++ b/blastest/src/dblat2.c @@ -155,10 +155,15 @@ static logical c_false = FALSE_; /* ===================================================================== */ /* Main program */ int main(void) { +#ifdef BLIS_ENABLE_HPX + char* program = "dblat2"; + bli_thread_initialize_hpx( 1, &program ); +#endif + /* Initialized data */ - static char snames[6*16] = "DGEMV " "DGBMV " "DSYMV " "DSBMV " "DSPMV " - "DTRMV " "DTBMV " "DTPMV " "DTRSV " "DTBSV " "DTPSV " "DGER " + static char snames[6*16] = "DGEMV " "DGBMV " "DSYMV " "DSBMV " "DSPMV " + "DTRMV " "DTBMV " "DTPMV " "DTRSV " "DTBSV " "DTPSV " "DGER " "DSYR " "DSPR " "DSYR2 " "DSPR2 "; /* Format strings */ @@ -204,10 +209,10 @@ static logical c_false = FALSE_; cllist cl__1; /* Builtin functions */ - integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, - char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), - s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, + char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), + s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); @@ -227,50 +232,50 @@ static logical c_false = FALSE_; integer ninc, nbet, ntra; logical rewi; integer nout; - extern /* Subroutine */ int dchk1_(char *, doublereal *, doublereal *, - integer *, integer *, logical *, logical *, logical *, integer *, - integer *, integer *, integer *, integer *, doublereal *, integer - *, doublereal *, integer *, integer *, integer *, integer *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, ftnlen), dchk2_(char *, - doublereal *, doublereal *, integer *, integer *, logical *, - logical *, logical *, integer *, integer *, integer *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, - integer *, integer *, integer *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, ftnlen), dchk3_(char *, doublereal *, doublereal *, - integer *, integer *, logical *, logical *, logical *, integer *, - integer *, integer *, integer *, integer *, integer *, integer *, + extern /* Subroutine */ int dchk1_(char *, doublereal *, doublereal *, + integer *, integer *, logical *, logical *, logical *, integer *, + integer *, integer *, integer *, integer *, doublereal *, integer + *, doublereal *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, ftnlen), dchk2_(char *, + doublereal *, doublereal *, integer *, integer *, logical *, + logical *, logical *, integer *, integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, ftnlen), dchk3_(char *, doublereal *, doublereal *, + integer *, integer *, logical *, logical *, logical *, integer *, + integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, ftnlen), dchk4_(char *, doublereal *, doublereal *, - integer *, integer *, logical *, logical *, logical *, integer *, - integer *, integer *, doublereal *, integer *, integer *, integer - *, integer *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, ftnlen), dchk5_(char *, doublereal *, doublereal *, - integer *, integer *, logical *, logical *, logical *, integer *, - integer *, integer *, doublereal *, integer *, integer *, integer - *, integer *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, ftnlen), dchk6_(char *, doublereal *, doublereal *, - integer *, integer *, logical *, logical *, logical *, integer *, - integer *, integer *, doublereal *, integer *, integer *, integer - *, integer *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, ftnlen), dchke_(integer *, char *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, ftnlen), dchk4_(char *, doublereal *, doublereal *, + integer *, integer *, logical *, logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, integer *, integer + *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, ftnlen), dchk5_(char *, doublereal *, doublereal *, + integer *, integer *, logical *, logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, integer *, integer + *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, ftnlen), dchk6_(char *, doublereal *, doublereal *, + integer *, integer *, logical *, logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, integer *, integer + *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, ftnlen), dchke_(integer *, char *, integer *, ftnlen); logical fatal, trace; integer nidim; - extern /* Subroutine */ int dmvch_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dmvch_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, logical *, integer *, + doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); char snaps[32], trans[1]; integer isnum; @@ -621,7 +626,7 @@ static logical c_false = FALSE_; goto L80; } for (i__ = 1; i__ <= 16; ++i__) { - if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) + if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L70; } @@ -668,7 +673,7 @@ static logical c_false = FALSE_; } i__1 = n; for (j = 1; j <= i__1; ++j) { - yy[j - 1] = (doublereal) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - + yy[j - 1] = (doublereal) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3); /* L130: */ } @@ -748,44 +753,44 @@ static logical c_false = FALSE_; /* Test DGEMV, 01, and DGBMV, 02. */ L140: dchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, - &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, + trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, + &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test DSYMV, 03, DSBMV, 04, and DSPMV, 05. */ L150: dchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, - &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, + trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, + &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test DTRMV, 06, DTBMV, 07, DTPMV, 08, */ /* DTRSV, 09, DTBSV, 10, and DTPSV, 11. */ L160: dchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, + trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, &c__65, &c__2, a, aa, as, y, yy, ys, yt, g, z__, (ftnlen) 6); goto L200; /* Test DGER, 12. */ L170: dchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, - inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, + inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test DSYR, 13, and DSPR, 14. */ L180: dchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, - inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, + inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test DSYR2, 15, and DSPR2, 16. */ L190: dchk6_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, - inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, + inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); L200: @@ -827,16 +832,21 @@ static logical c_false = FALSE_; /* End of DBLAT2. */ - return 0; +#ifdef BLIS_ENABLE_HPX + return bli_thread_finalize_hpx(); +#else + // Return peacefully. + return 0; +#endif } /* main */ -/* Subroutine */ int dchk1_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int dchk1_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * - fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, - integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, - integer *ninc, integer *inc, integer *nmax, integer *incmax, - doublereal *a, doublereal *aa, doublereal *as, doublereal *x, - doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, + fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, + integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, + integer *ninc, integer *inc, integer *nmax, integer *incmax, + doublereal *a, doublereal *aa, doublereal *as, doublereal *x, + doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, doublereal *ys, doublereal *yt, doublereal *g, ftnlen sname_len) { /* Initialized data */ @@ -881,21 +891,21 @@ static logical c_false = FALSE_; logical same; integer incx, incy; logical full, tran, null; - extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, - integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, + extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; extern /* Subroutine */ int dgbmv_(char *, integer *, integer *, integer * - , integer *, doublereal *, doublereal *, integer *, doublereal *, + , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dgemv_( - char *, integer *, integer *, doublereal *, doublereal *, integer + char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, - ftnlen), dmvch_(char *, integer *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, doublereal *, + ftnlen), dmvch_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, logical *, integer *, logical *, + doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; @@ -1079,9 +1089,9 @@ static logical c_false = FALSE_; transl = 0.; i__7 = abs(incy); i__8 = ml - 1; - dmake_("GE", " ", " ", &c__1, &ml, &y[1], + dmake_("GE", " ", " ", &c__1, &ml, &y[1], &c__1, &yy[1], &i__7, &c__0, & - i__8, &reset, &transl, (ftnlen)2, + i__8, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; @@ -1089,7 +1099,7 @@ static logical c_false = FALSE_; /* Save every datum before calling the */ /* subroutine. */ - *(unsigned char *)transs = *(unsigned + *(unsigned char *)transs = *(unsigned char *)trans; ms = m; ns = n; @@ -1149,7 +1159,7 @@ static logical c_false = FALSE_; al__1.aunit = *ntra; f_rew(&al__1); } - dgemv_(trans, &m, &n, &alpha, &aa[1], + dgemv_(trans, &m, &n, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, & yy[1], &incy, (ftnlen)1); } else if (banded) { @@ -1276,8 +1286,8 @@ static logical c_false = FALSE_; dmvch_(trans, &m, &n, &alpha, &a[ a_offset], nmax, &x[1], &incx, - &beta, &y[1], &incy, &yt[1], - &g[1], &yy[1], eps, &err, + &beta, &y[1], &incy, &yt[1], + &g[1], &yy[1], eps, &err, fatal, nout, &c_true, (ftnlen) 1); errmax = max(errmax,err); @@ -1381,13 +1391,13 @@ static logical c_false = FALSE_; } /* dchk1_ */ -/* Subroutine */ int dchk2_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int dchk2_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * - fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, - integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, - integer *ninc, integer *inc, integer *nmax, integer *incmax, - doublereal *a, doublereal *aa, doublereal *as, doublereal *x, - doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, + fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, + integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, + integer *ninc, integer *inc, integer *nmax, integer *incmax, + doublereal *a, doublereal *aa, doublereal *as, doublereal *x, + doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, doublereal *ys, doublereal *yt, doublereal *g, ftnlen sname_len) { /* Initialized data */ @@ -1425,7 +1435,7 @@ static logical c_false = FALSE_; f_rew(alist *); /* Local variables */ - integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, + integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, laa, lda; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, bls, err, beta; @@ -1434,29 +1444,29 @@ static logical c_false = FALSE_; integer incx, incy; logical full, null; char uplo[1]; - extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, - integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, + extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; - extern /* Subroutine */ int dmvch_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dmvch_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, logical *, integer *, + doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; - extern /* Subroutine */ int dsbmv_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dsbmv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen); logical reset; integer incxs, incys; - extern /* Subroutine */ int dspmv_(char *, integer *, doublereal *, + extern /* Subroutine */ int dspmv_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen); char uplos[1]; - extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, doublereal *, + extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen); logical banded, packed; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, @@ -1619,7 +1629,7 @@ static logical c_false = FALSE_; i__8 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &y[1], & c__1, &yy[1], &i__7, &c__0, &i__8, & - reset, &transl, (ftnlen)2, (ftnlen)1, + reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; @@ -1836,8 +1846,8 @@ static logical c_false = FALSE_; /* Check the result. */ - dmvch_("N", &n, &n, &alpha, &a[a_offset], - nmax, &x[1], &incx, &beta, &y[1], + dmvch_("N", &n, &n, &alpha, &a[a_offset], + nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); @@ -1947,12 +1957,12 @@ static logical c_false = FALSE_; } /* dchk2_ */ -/* Subroutine */ int dchk3_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int dchk3_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * - fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, - integer *ninc, integer *inc, integer *nmax, integer *incmax, - doublereal *a, doublereal *aa, doublereal *as, doublereal *x, - doublereal *xx, doublereal *xs, doublereal *xt, doublereal *g, + fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, + integer *ninc, integer *inc, integer *nmax, integer *incmax, + doublereal *a, doublereal *aa, doublereal *as, doublereal *x, + doublereal *xx, doublereal *xs, doublereal *xt, doublereal *g, doublereal *z__, ftnlen sname_len) { /* Initialized data */ @@ -2002,36 +2012,36 @@ static logical c_false = FALSE_; integer incx; logical full, null; char uplo[1]; - extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, - integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, + extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); char diags[1]; logical isame[13]; - extern /* Subroutine */ int dmvch_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dmvch_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, logical *, integer *, + doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; - extern /* Subroutine */ int dtbmv_(char *, char *, char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dtbmv_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen); logical reset; - extern /* Subroutine */ int dtbsv_(char *, char *, char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dtbsv_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen); integer incxs; char trans[1]; - extern /* Subroutine */ int dtpmv_(char *, char *, char *, integer *, - doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), + extern /* Subroutine */ int dtpmv_(char *, char *, char *, integer *, + doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, - doublereal *, integer *, ftnlen, ftnlen, ftnlen), dtpsv_(char *, - char *, char *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen), dtpsv_(char *, + char *, char *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen); char uplos[1]; - extern /* Subroutine */ int dtrsv_(char *, char *, char *, integer *, - doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, + extern /* Subroutine */ int dtrsv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen); logical banded, packed; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, @@ -2160,13 +2170,13 @@ static logical c_false = FALSE_; ; for (icd = 1; icd <= 2; ++icd) { - *(unsigned char *)diag = *(unsigned char *)&ichd[icd + *(unsigned char *)diag = *(unsigned char *)&ichd[icd - 1]; /* Generate the matrix A. */ transl = 0.; - dmake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], + dmake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], nmax, &aa[1], &lda, &k, &k, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -2213,7 +2223,7 @@ static logical c_false = FALSE_; /* Call the subroutine. */ - if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) + if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) == 0) { if (full) { if (*trace) { @@ -2266,7 +2276,7 @@ static logical c_false = FALSE_; al__1.aunit = *ntra; f_rew(&al__1); } - dtbmv_(uplo, trans, diag, &n, &k, &aa[1], + dtbmv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { @@ -2347,7 +2357,7 @@ static logical c_false = FALSE_; al__1.aunit = *ntra; f_rew(&al__1); } - dtbsv_(uplo, trans, diag, &n, &k, &aa[1], + dtbsv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { @@ -2389,11 +2399,11 @@ static logical c_false = FALSE_; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)uplo == *(unsigned + isame[0] = *(unsigned char *)uplo == *(unsigned char *)uplos; - isame[1] = *(unsigned char *)trans == *(unsigned + isame[1] = *(unsigned char *)trans == *(unsigned char *)transs; - isame[2] = *(unsigned char *)diag == *(unsigned + isame[2] = *(unsigned char *)diag == *(unsigned char *)diags; isame[3] = ns == n; if (full) { @@ -2464,7 +2474,7 @@ static logical c_false = FALSE_; dmvch_(trans, &n, &n, &c_b128, &a[ a_offset], nmax, &x[1], &incx, & c_b120, &z__[1], &incx, &xt[1], & - g[1], &xx[1], eps, &err, fatal, + g[1], &xx[1], eps, &err, fatal, nout, &c_true, (ftnlen)1); } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { @@ -2473,7 +2483,7 @@ static logical c_false = FALSE_; i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { - z__[i__] = xx[(i__ - 1) * abs(incx) + + z__[i__] = xx[(i__ - 1) * abs(incx) + 1]; xx[(i__ - 1) * abs(incx) + 1] = x[i__] ; @@ -2482,7 +2492,7 @@ static logical c_false = FALSE_; dmvch_(trans, &n, &n, &c_b128, &a[ a_offset], nmax, &z__[1], &incx, & c_b120, &x[1], &incx, &xt[1], &g[ - 1], &xx[1], eps, &err, fatal, + 1], &xx[1], eps, &err, fatal, nout, &c_false, (ftnlen)1); } errmax = max(errmax,err); @@ -2584,13 +2594,13 @@ static logical c_false = FALSE_; } /* dchk3_ */ -/* Subroutine */ int dchk4_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int dchk4_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * - fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, - integer *ninc, integer *inc, integer *nmax, integer *incmax, - doublereal *a, doublereal *aa, doublereal *as, doublereal *x, - doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, - doublereal *ys, doublereal *yt, doublereal *g, doublereal *z__, + fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, + integer *ninc, integer *inc, integer *nmax, integer *incmax, + doublereal *a, doublereal *aa, doublereal *as, doublereal *x, + doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, + doublereal *ys, doublereal *yt, doublereal *g, doublereal *z__, ftnlen sname_len) { /* Format strings */ @@ -2625,23 +2635,23 @@ static logical c_false = FALSE_; integer ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly, laa, lda; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, err; - extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, doublereal *, + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); integer ldas; logical same; integer incx, incy; logical null; - extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, - integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, + extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; - extern /* Subroutine */ int dmvch_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dmvch_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, logical *, integer *, + doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; @@ -2748,7 +2758,7 @@ static logical c_false = FALSE_; i__3 = abs(incx); i__4 = m - 1; dmake_("GE", " ", " ", &c__1, &m, &x[1], &c__1, &xx[1], &i__3, - &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, + &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (m > 1) { x[m / 2] = 0.; @@ -2782,7 +2792,7 @@ static logical c_false = FALSE_; transl = 0.; i__5 = m - 1; i__6 = n - 1; - dmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], + dmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -2913,9 +2923,9 @@ static logical c_false = FALSE_; } else { w[0] = y[n - j + 1]; } - dmvch_("N", &m, &c__1, &alpha, &z__[1], nmax, + dmvch_("N", &m, &c__1, &alpha, &z__[1], nmax, w, &c__1, &c_b128, &a[j * a_dim1 + 1], - &c__1, &yt[1], &g[1], &aa[(j - 1) * + &c__1, &yt[1], &g[1], &aa[(j - 1) * lda + 1], eps, &err, fatal, nout, & c_true, (ftnlen)1); errmax = max(errmax,err); @@ -2995,13 +3005,13 @@ static logical c_false = FALSE_; } /* dchk4_ */ -/* Subroutine */ int dchk5_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int dchk5_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * - fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, - integer *ninc, integer *inc, integer *nmax, integer *incmax, - doublereal *a, doublereal *aa, doublereal *as, doublereal *x, - doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, - doublereal *ys, doublereal *yt, doublereal *g, doublereal *z__, + fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, + integer *ninc, integer *inc, integer *nmax, integer *incmax, + doublereal *a, doublereal *aa, doublereal *as, doublereal *x, + doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, + doublereal *ys, doublereal *yt, doublereal *g, doublereal *z__, ftnlen sname_len) { /* Initialized data */ @@ -3047,21 +3057,21 @@ static logical c_false = FALSE_; logical same; integer incx; logical full; - extern /* Subroutine */ int dspr_(char *, integer *, doublereal *, + extern /* Subroutine */ int dspr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, ftnlen); logical null; char uplo[1]; - extern /* Subroutine */ int dsyr_(char *, integer *, doublereal *, + extern /* Subroutine */ int dsyr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, ftnlen), dmake_( - char *, char *, char *, integer *, integer *, doublereal *, - integer *, doublereal *, integer *, integer *, integer *, logical + char *, char *, char *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; - extern /* Subroutine */ int dmvch_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dmvch_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, logical *, integer *, + doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; @@ -3173,7 +3183,7 @@ static logical c_false = FALSE_; i__3 = abs(incx); i__4 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, - &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, + &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.; @@ -3342,9 +3352,9 @@ static logical c_false = FALSE_; jj = j; lj = n - j + 1; } - dmvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, + dmvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, &c__1, &c_b128, &a[jj + j * a_dim1], & - c__1, &yt[1], &g[1], &aa[ja], eps, &err, + c__1, &yt[1], &g[1], &aa[ja], eps, &err, fatal, nout, &c_true, (ftnlen)1); if (full) { if (upper) { @@ -3442,13 +3452,13 @@ static logical c_false = FALSE_; } /* dchk5_ */ -/* Subroutine */ int dchk6_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int dchk6_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * - fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, - integer *ninc, integer *inc, integer *nmax, integer *incmax, - doublereal *a, doublereal *aa, doublereal *as, doublereal *x, - doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, - doublereal *ys, doublereal *yt, doublereal *g, doublereal *z__, + fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, + integer *ninc, integer *inc, integer *nmax, integer *incmax, + doublereal *a, doublereal *aa, doublereal *as, doublereal *x, + doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, + doublereal *ys, doublereal *yt, doublereal *g, doublereal *z__, ftnlen sname_len) { /* Initialized data */ @@ -3477,7 +3487,7 @@ static logical c_false = FALSE_; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, + integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; @@ -3496,19 +3506,19 @@ static logical c_false = FALSE_; integer incx, incy; logical full, null; char uplo[1]; - extern /* Subroutine */ int dspr2_(char *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - ftnlen), dsyr2_(char *, integer *, doublereal *, doublereal *, - integer *, doublereal *, integer *, doublereal *, integer *, - ftnlen), dmake_(char *, char *, char *, integer *, integer *, - doublereal *, integer *, doublereal *, integer *, integer *, + extern /* Subroutine */ int dspr2_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + ftnlen), dsyr2_(char *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), dmake_(char *, char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; - extern /* Subroutine */ int dmvch_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dmvch_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, logical *, integer *, + doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; @@ -3622,7 +3632,7 @@ static logical c_false = FALSE_; i__3 = abs(incx); i__4 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, - &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, + &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.; @@ -3657,7 +3667,7 @@ static logical c_false = FALSE_; transl = 0.; i__5 = n - 1; i__6 = n - 1; - dmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], + dmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -3835,7 +3845,7 @@ static logical c_false = FALSE_; jj = j; lj = n - j + 1; } - dmvch_("N", &lj, &c__2, &alpha, &z__[jj + + dmvch_("N", &lj, &c__2, &alpha, &z__[jj + z_dim1], nmax, w, &c__1, &c_b128, &a[ jj + j * a_dim1], &c__1, &yt[1], &g[1] , &aa[ja], eps, &err, fatal, nout, & @@ -3941,7 +3951,7 @@ static logical c_false = FALSE_; } /* dchk6_ */ -/* Subroutine */ int dchke_(integer *isnum, char *srnamt, integer *nout, +/* Subroutine */ int dchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ @@ -3955,39 +3965,39 @@ static logical c_false = FALSE_; /* Local variables */ doublereal a[1] /* was [1][1] */, x[1], y[1], beta; - extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - integer *), dspr_(char *, integer *, doublereal *, doublereal *, - integer *, doublereal *, ftnlen), dsyr_(char *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - ftnlen), dspr2_(char *, integer *, doublereal *, doublereal *, + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), dspr_(char *, integer *, doublereal *, doublereal *, + integer *, doublereal *, ftnlen), dsyr_(char *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + ftnlen), dspr2_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, ftnlen), dsyr2_( - char *, integer *, doublereal *, doublereal *, integer *, + char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen); doublereal alpha; extern /* Subroutine */ int dgbmv_(char *, integer *, integer *, integer * - , integer *, doublereal *, doublereal *, integer *, doublereal *, + , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dgemv_( - char *, integer *, integer *, doublereal *, doublereal *, integer + char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, - ftnlen), dsbmv_(char *, integer *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - doublereal *, integer *, ftnlen), dtbmv_(char *, char *, char *, - integer *, integer *, doublereal *, integer *, doublereal *, + ftnlen), dsbmv_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, ftnlen), dtbmv_(char *, char *, char *, + integer *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dtbsv_(char *, char *, char *, - integer *, integer *, doublereal *, integer *, doublereal *, - integer *, ftnlen, ftnlen, ftnlen), dspmv_(char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen, ftnlen, ftnlen), dspmv_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, - doublereal *, integer *, ftnlen), dtpmv_(char *, char *, char *, - integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, - ftnlen), dtrmv_(char *, char *, char *, integer *, doublereal *, - integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), - dtpsv_(char *, char *, char *, integer *, doublereal *, - doublereal *, integer *, ftnlen, ftnlen, ftnlen), dsymv_(char *, - integer *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, integer *, ftnlen), dtpmv_(char *, char *, char *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, + ftnlen), dtrmv_(char *, char *, char *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), + dtpsv_(char *, char *, char *, integer *, doublereal *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen), dsymv_(char *, + integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dtrsv_( - char *, char *, char *, integer *, doublereal *, integer *, - doublereal *, integer *, ftnlen, ftnlen, ftnlen), chkxer_(char *, + char *, char *, char *, integer *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen), chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ @@ -4493,9 +4503,9 @@ static logical c_false = FALSE_; } /* dchke_ */ -/* Subroutine */ int dmake_(char *type__, char *uplo, char *diag, integer *m, +/* Subroutine */ int dmake_(char *type__, char *uplo, char *diag, integer *m, integer *n, doublereal *a, integer *nmax, doublereal *aa, integer * - lda, integer *kl, integer *ku, logical *reset, doublereal *transl, + lda, integer *kl, integer *ku, logical *reset, doublereal *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ @@ -4553,7 +4563,7 @@ static logical c_false = FALSE_; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { - if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) + if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) { a[i__ + j * a_dim1] = dbeg_(reset) + *transl; } else { @@ -4728,9 +4738,9 @@ static logical c_false = FALSE_; } /* dmake_ */ /* Subroutine */ int dmvch_(char *trans, integer *m, integer *n, doublereal * - alpha, doublereal *a, integer *nmax, doublereal *x, integer *incx, - doublereal *beta, doublereal *y, integer *incy, doublereal *yt, - doublereal *g, doublereal *yy, doublereal *eps, doublereal *err, + alpha, doublereal *a, integer *nmax, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy, doublereal *yt, + doublereal *g, doublereal *yy, doublereal *eps, doublereal *err, logical *fatal, integer *nout, logical *mv, ftnlen trans_len) { /* Format strings */ @@ -4845,7 +4855,7 @@ static logical c_false = FALSE_; *err = 0.; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { - erri = (d__1 = yt[i__] - yy[(i__ - 1) * abs(*incy) + 1], abs(d__1)) / + erri = (d__1 = yt[i__] - yy[(i__ - 1) * abs(*incy) + 1], abs(d__1)) / *eps; if (g[i__] != 0.) { erri /= g[i__]; @@ -5102,7 +5112,7 @@ doublereal ddiff_(doublereal *x, doublereal *y) } /* ddiff_ */ -/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, +/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ diff --git a/blastest/src/dblat3.c b/blastest/src/dblat3.c index d7a85e29c1..b4698f56cb 100644 --- a/blastest/src/dblat3.c +++ b/blastest/src/dblat3.c @@ -135,9 +135,14 @@ static integer c__2 = 2; /* ===================================================================== */ /* Main program */ int main(void) { +#ifdef BLIS_ENABLE_HPX + char* program = "dblat3"; + bli_thread_initialize_hpx( 1, &program ); +#endif + /* Initialized data */ - static char snames[6*6] = "DGEMM " "DSYMM " "DTRMM " "DTRSM " "DSYRK " + static char snames[6*6] = "DGEMM " "DSYMM " "DTRMM " "DTRSM " "DSYRK " "DSYR2K"; /* Format strings */ @@ -179,10 +184,10 @@ static integer c__2 = 2; cllist cl__1; /* Builtin functions */ - integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, - char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), - s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, + char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), + s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); @@ -200,38 +205,38 @@ static integer c__2 = 2; integer nbet, ntra; logical rewi; integer nout; - extern /* Subroutine */ int dchk1_(char *, doublereal *, doublereal *, - integer *, integer *, logical *, logical *, logical *, integer *, - integer *, integer *, doublereal *, integer *, doublereal *, + extern /* Subroutine */ int dchk1_(char *, doublereal *, doublereal *, + integer *, integer *, logical *, logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, ftnlen), dchk2_(char *, - doublereal *, doublereal *, integer *, integer *, logical *, - logical *, logical *, integer *, integer *, integer *, doublereal + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, ftnlen), dchk2_(char *, + doublereal *, doublereal *, integer *, integer *, logical *, + logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, ftnlen), dchk3_(char *, doublereal *, doublereal *, - integer *, integer *, logical *, logical *, logical *, integer *, - integer *, integer *, doublereal *, integer *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), - dchk4_(char *, doublereal *, doublereal *, integer *, integer *, - logical *, logical *, logical *, integer *, integer *, integer *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, ftnlen), dchk5_(char *, doublereal *, - doublereal *, integer *, integer *, logical *, logical *, logical - *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, ftnlen), dchk3_(char *, doublereal *, doublereal *, + integer *, integer *, logical *, logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), + dchk4_(char *, doublereal *, doublereal *, integer *, integer *, + logical *, logical *, logical *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, ftnlen), dchk5_(char *, doublereal *, + doublereal *, integer *, integer *, logical *, logical *, logical + *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchke_(integer *, char *, integer *, ftnlen); logical fatal; - extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *, doublereal *, + extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical trace; @@ -506,7 +511,7 @@ static integer c__2 = 2; goto L60; } for (i__ = 1; i__ <= 6; ++i__) { - if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) + if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L50; } @@ -554,7 +559,7 @@ static integer c__2 = 2; } i__1 = n; for (j = 1; j <= i__1; ++j) { - cc[j - 1] = (doublereal) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - + cc[j - 1] = (doublereal) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3); /* L110: */ } @@ -599,7 +604,7 @@ static integer c__2 = 2; } i__1 = n; for (j = 1; j <= i__1; ++j) { - cc[n - j] = (doublereal) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - + cc[n - j] = (doublereal) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3); /* L130: */ } @@ -672,34 +677,34 @@ static integer c__2 = 2; /* Test DGEMM, 01. */ L140: dchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, - bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test DSYMM, 02. */ L150: dchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, - bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test DTRMM, 03, DTRSM, 04. */ L160: dchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, ab, aa, as, &ab[4225], bb, bs, ct, g, c__, (ftnlen)6); goto L190; /* Test DSYRK, 05. */ L170: dchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, - bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test DSYR2K, 06. */ L180: dchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, bb, bs, c__, cc, cs, ct, g, w, ( ftnlen)6); goto L190; @@ -743,15 +748,20 @@ static integer c__2 = 2; /* End of DBLAT3. */ - return 0; +#ifdef BLIS_ENABLE_HPX + return bli_thread_finalize_hpx(); +#else + // Return peacefully. + return 0; +#endif } /* main */ -/* Subroutine */ int dchk1_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int dchk1_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * - fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, - integer *nbet, doublereal *bet, integer *nmax, doublereal *a, - doublereal *aa, doublereal *as, doublereal *b, doublereal *bb, - doublereal *bs, doublereal *c__, doublereal *cc, doublereal *cs, + fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, + integer *nbet, doublereal *bet, integer *nmax, doublereal *a, + doublereal *aa, doublereal *as, doublereal *b, doublereal *bb, + doublereal *bs, doublereal *c__, doublereal *cc, doublereal *cs, doublereal *ct, doublereal *g, ftnlen sname_len) { /* Initialized data */ @@ -775,7 +785,7 @@ static integer c__2 = 2; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; @@ -784,22 +794,22 @@ static integer c__2 = 2; f_rew(alist *); /* Local variables */ - integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, + integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, ica, icb, laa, lbb, lda, lcc, ldb, ldc; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, bls, err, beta; integer ldas, ldbs, ldcs; logical same, null; - extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; - extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *, doublereal *, + extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, - logical *, integer *, logical *, ftnlen, ftnlen), dgemm_(char *, - char *, integer *, integer *, integer *, doublereal *, doublereal + logical *, integer *, logical *, ftnlen, ftnlen), dgemm_(char *, + char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); logical isame[13], trana, tranb; @@ -898,7 +908,7 @@ static integer c__2 = 2; for (ica = 1; ica <= 3; ++ica) { *(unsigned char *)transa = *(unsigned char *)&ich[ica - 1] ; - trana = *(unsigned char *)transa == 'T' || *(unsigned + trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; if (trana) { @@ -926,9 +936,9 @@ static integer c__2 = 2; ftnlen)1); for (icb = 1; icb <= 3; ++icb) { - *(unsigned char *)transb = *(unsigned char *)&ich[icb + *(unsigned char *)transb = *(unsigned char *)&ich[icb - 1]; - tranb = *(unsigned char *)transb == 'T' || *(unsigned + tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; if (tranb) { @@ -1100,9 +1110,9 @@ static integer c__2 = 2; dmmch_(transa, transb, &m, &n, &k, &alpha, &a[a_offset], nmax, &b[b_offset], - nmax, &beta, &c__[c_offset], + nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, - eps, &err, fatal, nout, &c_true, + eps, &err, fatal, nout, &c_true, (ftnlen)1, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ @@ -1183,12 +1193,12 @@ static integer c__2 = 2; } /* dchk1_ */ -/* Subroutine */ int dchk2_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int dchk2_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * - fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, - integer *nbet, doublereal *bet, integer *nmax, doublereal *a, - doublereal *aa, doublereal *as, doublereal *b, doublereal *bb, - doublereal *bs, doublereal *c__, doublereal *cc, doublereal *cs, + fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, + integer *nbet, doublereal *bet, integer *nmax, doublereal *a, + doublereal *aa, doublereal *as, doublereal *b, doublereal *bb, + doublereal *bs, doublereal *c__, doublereal *cc, doublereal *cs, doublereal *ct, doublereal *g, ftnlen sname_len) { /* Initialized data */ @@ -1213,7 +1223,7 @@ static integer c__2 = 2; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; @@ -1222,7 +1232,7 @@ static integer c__2 = 2; f_rew(alist *); /* Local variables */ - integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, + integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, ldb, ldc; extern logical lde_(doublereal *, doublereal *, integer *); integer ics; @@ -1234,21 +1244,21 @@ static integer c__2 = 2; char side[1]; logical left, null; char uplo[1]; - extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; - extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *, doublereal *, + extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; char sides[1]; integer nargs; logical reset; - extern /* Subroutine */ int dsymm_(char *, char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dsymm_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); char uplos[1]; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, @@ -1391,7 +1401,7 @@ static integer c__2 = 2; /* Generate the matrix C. */ - dmake_("GE", " ", " ", &m, &n, &c__[c_offset], + dmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b86, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -1472,9 +1482,9 @@ static integer c__2 = 2; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)sides == *(unsigned + isame[0] = *(unsigned char *)sides == *(unsigned char *)side; - isame[1] = *(unsigned char *)uplos == *(unsigned + isame[1] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[2] = ms == m; isame[3] = ns == n; @@ -1519,14 +1529,14 @@ static integer c__2 = 2; if (left) { dmmch_("N", "N", &m, &n, &m, &alpha, &a[ - a_offset], nmax, &b[b_offset], + a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { dmmch_("N", "N", &m, &n, &n, &alpha, &b[ - b_offset], nmax, &a[a_offset], + b_offset], nmax, &a[a_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( @@ -1606,11 +1616,11 @@ static integer c__2 = 2; } /* dchk2_ */ -/* Subroutine */ int dchk3_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int dchk3_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * - fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, - integer *nmax, doublereal *a, doublereal *aa, doublereal *as, - doublereal *b, doublereal *bb, doublereal *bs, doublereal *ct, + fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, + integer *nmax, doublereal *a, doublereal *aa, doublereal *as, + doublereal *b, doublereal *bb, doublereal *bs, doublereal *ct, doublereal *g, doublereal *c__, ftnlen sname_len) { /* Initialized data */ @@ -1637,7 +1647,7 @@ static integer c__2 = 2; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; @@ -1658,25 +1668,25 @@ static integer c__2 = 2; char side[1]; logical left, null; char uplo[1]; - extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; char diags[1]; - extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *, doublereal *, + extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; char sides[1]; integer nargs; logical reset; - extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, - integer *, integer *, doublereal *, doublereal *, integer *, + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), dtrsm_( char *, char *, char *, char *, integer *, integer *, doublereal * - , doublereal *, integer *, doublereal *, integer *, ftnlen, + , doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); char uplos[1]; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, @@ -1816,7 +1826,7 @@ static integer c__2 = 2; /* Generate the matrix B. */ - dmake_("GE", " ", " ", &m, &n, &b[b_offset], + dmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, &reset, &c_b86, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -1882,7 +1892,7 @@ static integer c__2 = 2; } dtrmm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, - (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } else if (s_cmp(sname + 3, "SM", (ftnlen)2, ( ftnlen)2) == 0) { @@ -1915,7 +1925,7 @@ static integer c__2 = 2; } dtrsm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, - (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } @@ -1984,18 +1994,18 @@ static integer c__2 = 2; dmmch_(transa, "N", &m, &n, &m, & alpha, &a[a_offset], nmax, &b[b_offset], nmax, & - c_b86, &c__[c_offset], + c_b86, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ - 1], &ldb, eps, &err, + 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { dmmch_("N", transa, &m, &n, &n, & alpha, &b[b_offset], nmax, &a[a_offset], nmax, & - c_b86, &c__[c_offset], + c_b86, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ - 1], &ldb, eps, &err, + 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } @@ -2008,10 +2018,10 @@ static integer c__2 = 2; i__4 = n; for (j = 1; j <= i__4; ++j) { i__5 = m; - for (i__ = 1; i__ <= i__5; ++i__) + for (i__ = 1; i__ <= i__5; ++i__) { c__[i__ + j * c_dim1] = bb[i__ + (j - 1) * ldb]; - bb[i__ + (j - 1) * ldb] = alpha * b[i__ + j * + bb[i__ + (j - 1) * ldb] = alpha * b[i__ + j * b_dim1]; /* L60: */ } @@ -2024,16 +2034,16 @@ static integer c__2 = 2; &c__[c_offset], nmax, & c_b86, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & - ldb, eps, &err, fatal, + ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } else { dmmch_("N", transa, &m, &n, &n, & - c_b96, &c__[c_offset], - nmax, &a[a_offset], nmax, - &c_b86, &b[b_offset], + c_b96, &c__[c_offset], + nmax, &a[a_offset], nmax, + &c_b86, &b[b_offset], nmax, &ct[1], &g[1], &bb[ - 1], &ldb, eps, &err, + 1], &ldb, eps, &err, fatal, nout, &c_false, ( ftnlen)1, (ftnlen)1); } @@ -2114,12 +2124,12 @@ static integer c__2 = 2; } /* dchk3_ */ -/* Subroutine */ int dchk4_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int dchk4_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * - fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, - integer *nbet, doublereal *bet, integer *nmax, doublereal *a, - doublereal *aa, doublereal *as, doublereal *b, doublereal *bb, - doublereal *bs, doublereal *c__, doublereal *cc, doublereal *cs, + fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, + integer *nbet, doublereal *bet, integer *nmax, doublereal *a, + doublereal *aa, doublereal *as, doublereal *b, doublereal *bb, + doublereal *bs, doublereal *c__, doublereal *cc, doublereal *cs, doublereal *ct, doublereal *g, ftnlen sname_len) { /* Initialized data */ @@ -2146,7 +2156,7 @@ static integer c__2 = 2; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; @@ -2166,13 +2176,13 @@ static integer c__2 = 2; doublereal bets; logical tran, null; char uplo[1]; - extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; - extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *, doublereal *, + extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; @@ -2180,7 +2190,7 @@ static integer c__2 = 2; logical reset; char trans[1]; logical upper; - extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, + extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); char uplos[1]; @@ -2312,7 +2322,7 @@ static integer c__2 = 2; /* Generate the matrix C. */ - dmake_("SY", uplo, " ", &n, &n, &c__[c_offset], + dmake_("SY", uplo, " ", &n, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b86, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -2369,7 +2379,7 @@ static integer c__2 = 2; al__1.aunit = *ntra; f_rew(&al__1); } - dsyrk_(uplo, trans, &n, &k, &alpha, &aa[1], &lda, + dsyrk_(uplo, trans, &n, &k, &alpha, &aa[1], &lda, &beta, &cc[1], &ldc, (ftnlen)1, (ftnlen)1) ; @@ -2385,9 +2395,9 @@ static integer c__2 = 2; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)uplos == *(unsigned + isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; - isame[1] = *(unsigned char *)transs == *(unsigned + isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; @@ -2440,19 +2450,19 @@ static integer c__2 = 2; } if (tran) { dmmch_("T", "N", &lj, &c__1, &k, & - alpha, &a[jj * a_dim1 + 1], - nmax, &a[j * a_dim1 + 1], - nmax, &beta, &c__[jj + j * - c_dim1], nmax, &ct[1], &g[1], - &cc[jc], &ldc, eps, &err, + alpha, &a[jj * a_dim1 + 1], + nmax, &a[j * a_dim1 + 1], + nmax, &beta, &c__[jj + j * + c_dim1], nmax, &ct[1], &g[1], + &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { dmmch_("N", "T", &lj, &c__1, &k, & - alpha, &a[jj + a_dim1], nmax, + alpha, &a[jj + a_dim1], nmax, &a[j + a_dim1], nmax, &beta, & c__[jj + j * c_dim1], nmax, & - ct[1], &g[1], &cc[jc], &ldc, + ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, & c_true, (ftnlen)1, (ftnlen)1); } @@ -2544,12 +2554,12 @@ static integer c__2 = 2; } /* dchk4_ */ -/* Subroutine */ int dchk5_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int dchk5_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * - fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, - integer *nbet, doublereal *bet, integer *nmax, doublereal *ab, - doublereal *aa, doublereal *as, doublereal *bb, doublereal *bs, - doublereal *c__, doublereal *cc, doublereal *cs, doublereal *ct, + fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, + integer *nbet, doublereal *bet, integer *nmax, doublereal *ab, + doublereal *aa, doublereal *as, doublereal *bb, doublereal *bs, + doublereal *c__, doublereal *cc, doublereal *cs, doublereal *ct, doublereal *g, doublereal *w, ftnlen sname_len) { /* Initialized data */ @@ -2597,13 +2607,13 @@ static integer c__2 = 2; doublereal bets; logical tran, null; char uplo[1]; - extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; - extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *, doublereal *, + extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; @@ -2612,8 +2622,8 @@ static integer c__2 = 2; char trans[1]; logical upper; char uplos[1]; - extern /* Subroutine */ int dsyr2k_(char *, char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, + extern /* Subroutine */ int dsyr2k_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); @@ -2762,7 +2772,7 @@ static integer c__2 = 2; /* Generate the matrix C. */ - dmake_("SY", uplo, " ", &n, &n, &c__[c_offset], + dmake_("SY", uplo, " ", &n, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b86, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -2843,9 +2853,9 @@ static integer c__2 = 2; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)uplos == *(unsigned + isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; - isame[1] = *(unsigned char *)transs == *(unsigned + isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; @@ -2902,7 +2912,7 @@ static integer c__2 = 2; if (tran) { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { - w[i__] = ab[(j - 1 << 1) * *nmax + w[i__] = ab[(j - 1 << 1) * *nmax + k + i__]; w[k + i__] = ab[(j - 1 << 1) * * nmax + i__]; @@ -2913,17 +2923,17 @@ static integer c__2 = 2; i__8 = *nmax << 1; dmmch_("T", "N", &lj, &c__1, &i__6, & alpha, &ab[jjab], &i__7, &w[1] - , &i__8, &beta, &c__[jj + j * - c_dim1], nmax, &ct[1], &g[1], - &cc[jc], &ldc, eps, &err, + , &i__8, &beta, &c__[jj + j * + c_dim1], nmax, &ct[1], &g[1], + &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { - w[i__] = ab[(k + i__ - 1) * *nmax + w[i__] = ab[(k + i__ - 1) * *nmax + j]; - w[k + i__] = ab[(i__ - 1) * *nmax + w[k + i__] = ab[(i__ - 1) * *nmax + j]; /* L60: */ } @@ -2931,9 +2941,9 @@ static integer c__2 = 2; i__7 = *nmax << 1; dmmch_("N", "N", &lj, &c__1, &i__6, & alpha, &ab[jj], nmax, &w[1], & - i__7, &beta, &c__[jj + j * - c_dim1], nmax, &ct[1], &g[1], - &cc[jc], &ldc, eps, &err, + i__7, &beta, &c__[jj + j * + c_dim1], nmax, &ct[1], &g[1], + &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } @@ -3029,7 +3039,7 @@ static integer c__2 = 2; } /* dchk5_ */ -/* Subroutine */ int dchke_(integer *isnum, char *srnamt, integer *nout, +/* Subroutine */ int dchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ @@ -3042,24 +3052,24 @@ static integer c__2 = 2; integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ - doublereal a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] + doublereal a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] /* was [2][1] */, beta, alpha; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen), - dtrmm_(char *, char *, char *, char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, + dtrmm_(char *, char *, char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), dsymm_(char *, char *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen), - dtrsm_(char *, char *, char *, char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, + dtrsm_(char *, char *, char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), dsyrk_(char *, char *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - doublereal *, integer *, ftnlen, ftnlen), dsyr2k_(char *, char *, - integer *, integer *, doublereal *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - ftnlen, ftnlen), chkxer_(char *, integer *, integer *, logical *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, integer *, ftnlen, ftnlen), dsyr2k_(char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen), chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ @@ -3113,142 +3123,142 @@ static integer c__2 = 2; } L10: infoc_1.infot = 1; - dgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; - dgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; - dgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; - dgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - dgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - dgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - dgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - dgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - dgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - dgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - dgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - dgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - dgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - dgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - dgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - dgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - dgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - dgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - dgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, + dgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - dgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - dgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - dgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, + dgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - dgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - dgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - dgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, + dgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - dgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, + dgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - dgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - dgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + dgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); @@ -3952,9 +3962,9 @@ static integer c__2 = 2; } /* dchke_ */ -/* Subroutine */ int dmake_(char *type__, char *uplo, char *diag, integer *m, +/* Subroutine */ int dmake_(char *type__, char *uplo, char *diag, integer *m, integer *n, doublereal *a, integer *nmax, doublereal *aa, integer * - lda, logical *reset, doublereal *transl, ftnlen type_len, ftnlen + lda, logical *reset, doublereal *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ @@ -4097,8 +4107,8 @@ static integer c__2 = 2; } /* dmake_ */ /* Subroutine */ int dmmch_(char *transa, char *transb, integer *m, integer * - n, integer *kk, doublereal *alpha, doublereal *a, integer *lda, - doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, + n, integer *kk, doublereal *alpha, doublereal *a, integer *lda, + doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, integer *ldc, doublereal *ct, doublereal *g, doublereal *cc, integer * ldcc, doublereal *eps, doublereal *err, logical *fatal, integer *nout, logical *mv, ftnlen transa_len, ftnlen transb_len) @@ -4112,7 +4122,7 @@ static integer c__2 = 2; " \002,i3)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, cc_offset, i__1, i__2, i__3; doublereal d__1, d__2; @@ -4166,9 +4176,9 @@ static integer c__2 = 2; cc -= cc_offset; /* Function Body */ - trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == + trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; - tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == + tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; /* Compute expected result, one column at a time, in CT using data */ @@ -4190,7 +4200,7 @@ static integer c__2 = 2; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[i__ + k * a_dim1] * b[k + j * b_dim1]; - g[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 + g[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = b[k + j * b_dim1], abs(d__2)); /* L20: */ } @@ -4202,7 +4212,7 @@ static integer c__2 = 2; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[k + i__ * a_dim1] * b[k + j * b_dim1]; - g[i__] += (d__1 = a[k + i__ * a_dim1], abs(d__1)) * (d__2 + g[i__] += (d__1 = a[k + i__ * a_dim1], abs(d__1)) * (d__2 = b[k + j * b_dim1], abs(d__2)); /* L40: */ } @@ -4214,7 +4224,7 @@ static integer c__2 = 2; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[i__ + k * a_dim1] * b[j + k * b_dim1]; - g[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 + g[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = b[j + k * b_dim1], abs(d__2)); /* L60: */ } @@ -4226,7 +4236,7 @@ static integer c__2 = 2; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[k + i__ * a_dim1] * b[j + k * b_dim1]; - g[i__] += (d__1 = a[k + i__ * a_dim1], abs(d__1)) * (d__2 + g[i__] += (d__1 = a[k + i__ * a_dim1], abs(d__1)) * (d__2 = b[j + k * b_dim1], abs(d__2)); /* L80: */ } @@ -4520,7 +4530,7 @@ doublereal ddiff_(doublereal *x, doublereal *y) } /* ddiff_ */ -/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, +/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ diff --git a/blastest/src/sblat1.c b/blastest/src/sblat1.c index 6996666a55..7bde1b108f 100644 --- a/blastest/src/sblat1.c +++ b/blastest/src/sblat1.c @@ -69,6 +69,11 @@ static real c_b63 = 0.f; /* ===================================================================== */ /* Main program */ int main(void) { +#ifdef BLIS_ENABLE_HPX + char* program = "sblat1"; + bli_thread_initialize_hpx( 1, &program ); +#endif + /* Initialized data */ static real sfac = 9.765625e-4f; @@ -123,11 +128,11 @@ static real c_b63 = 0.f; combla_1.incy = 9999; if (combla_1.icase == 3 || combla_1.icase == 11) { check0_(&sfac); - } else if (combla_1.icase == 7 || combla_1.icase == 8 || + } else if (combla_1.icase == 7 || combla_1.icase == 8 || combla_1.icase == 9 || combla_1.icase == 10) { check1_(&sfac); - } else if (combla_1.icase == 1 || combla_1.icase == 2 || - combla_1.icase == 5 || combla_1.icase == 6 || combla_1.icase + } else if (combla_1.icase == 1 || combla_1.icase == 2 || + combla_1.icase == 5 || combla_1.icase == 6 || combla_1.icase == 12 || combla_1.icase == 13) { check2_(&sfac); } else if (combla_1.icase == 4) { @@ -142,7 +147,12 @@ static real c_b63 = 0.f; } s_stop("", (ftnlen)0); - return 0; +#ifdef BLIS_ENABLE_HPX + return bli_thread_finalize_hpx(); +#else + // Return peacefully. + return 0; +#endif } /* main */ /* Subroutine */ int header_(void) @@ -202,16 +212,16 @@ static real c_b63 = 0.f; static real dc1[8] = { .6f,.8f,-.6f,.8f,.6f,1.f,0.f,1.f }; /* Builtin functions */ - integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, k; real sa, sb, sc, ss, dtemp[9]; - extern /* Subroutine */ int srotg_(real *, real *, real *, real *), + extern /* Subroutine */ int srotg_(real *, real *, real *, real *), stest_(integer *, real *, real *, real *, real *), stest1_(real *, - real *, real *, real *), srotmg_(real *, real *, real *, real *, + real *, real *, real *), srotmg_(real *, real *, real *, real *, real *); /* Fortran I/O blocks */ @@ -322,7 +332,7 @@ static real c_b63 = 0.f; real r__1; /* Builtin functions */ - integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); @@ -335,8 +345,8 @@ static real c_b63 = 0.f; real stemp[1]; extern real sasum_(integer *, real *, integer *); real strue[8]; - extern /* Subroutine */ int stest_(integer *, real *, real *, real *, - real *), itest1_(integer *, integer *), stest1_(real *, real *, + extern /* Subroutine */ int stest_(integer *, real *, real *, real *, + real *), itest1_(integer *, integer *), stest1_(real *, real *, real *, real *); extern integer isamax_(integer *, real *, integer *); @@ -378,11 +388,11 @@ static real c_b63 = 0.f; stest1_(&r__1, stemp, stemp, sfac); } else if (combla_1.icase == 9) { /* .. SSCAL .. */ - sscal_(&combla_1.n, &sa[(combla_1.incx - 1) * 5 + np1 - 1], + sscal_(&combla_1.n, &sa[(combla_1.incx - 1) * 5 + np1 - 1], sx, &combla_1.incx); i__1 = len; for (i__ = 1; i__ <= i__1; ++i__) { - strue[i__ - 1] = dtrue5[i__ + (np1 + combla_1.incx * 5 << + strue[i__ - 1] = dtrue5[i__ + (np1 + combla_1.incx * 5 << 3) - 49]; /* L40: */ } @@ -455,87 +465,87 @@ static real c_b63 = 0.f; ; static struct { real e_1[448]; - } equiv_3 = {{ .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, 0.f, + } equiv_3 = {{ .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 3.8f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, - 2.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, -.4f, 0.f, 0.f, 0.f, - 0.f, 0.f, .6f, .1f, -.5f, .8f, 0.f, 0.f, 0.f, -.8f, 3.8f, - -2.2f, -1.2f, 0.f, 0.f, 0.f, -.9f, 2.8f, -1.4f, -1.3f, 0.f, - 0.f, 0.f, 3.5f, -.4f, -2.2f, 4.7f, 0.f, 0.f, 0.f, .6f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, 0.f, 0.f, 0.f, + 2.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, -.4f, 0.f, 0.f, 0.f, + 0.f, 0.f, .6f, .1f, -.5f, .8f, 0.f, 0.f, 0.f, -.8f, 3.8f, + -2.2f, -1.2f, 0.f, 0.f, 0.f, -.9f, 2.8f, -1.4f, -1.3f, 0.f, + 0.f, 0.f, 3.5f, -.4f, -2.2f, 4.7f, 0.f, 0.f, 0.f, .6f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, 0.f, 0.f, 0.f, 0.f, 0.f, .1f, -3.f, 0.f, 0.f, 0.f, 0.f, -.3f, .1f, -2.f, 0.f, 0.f, 0.f, 0.f, 3.3f, .1f, -2.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, - -.5f, .8f, .9f, -.3f, -.4f, -2.f, .1f, 1.4f, .8f, .6f, -.3f, - -2.8f, -1.8f, .1f, 1.3f, .8f, 0.f, -.3f, -1.9f, 3.8f, .1f, - -3.1f, .8f, 4.8f, -.3f, -1.5f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, 0.f, 0.f, 0.f, 0.f, + -.5f, .8f, .9f, -.3f, -.4f, -2.f, .1f, 1.4f, .8f, .6f, -.3f, + -2.8f, -1.8f, .1f, 1.3f, .8f, 0.f, -.3f, -1.9f, 3.8f, .1f, + -3.1f, .8f, 4.8f, -.3f, -1.5f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, 0.f, 0.f, 0.f, 0.f, 4.8f, .1f, -3.f, - 0.f, 0.f, 0.f, 0.f, 3.3f, .1f, -2.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 3.3f, .1f, -2.f, 0.f, 0.f, 0.f, 0.f, 2.1f, .1f, -2.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, .8f, .9f, -.3f, -.4f, -1.6f, .1f, -2.2f, .8f, 5.4f, -.3f, -2.8f, -1.5f, - .1f, -1.4f, .8f, 3.6f, -.3f, -1.9f, 3.7f, .1f, -2.2f, .8f, - 3.6f, -.3f, -1.5f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - .6f, .1f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, -1.f, 0.f, 0.f, 0.f, + .1f, -1.4f, .8f, 3.6f, -.3f, -1.9f, 3.7f, .1f, -2.2f, .8f, + 3.6f, -.3f, -1.5f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + .6f, .1f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, -1.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, .8f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, .8f, 0.f, 0.f, 0.f, -.8f, - -1.f, 1.4f, -1.6f, 0.f, 0.f, 0.f, -.9f, -.8f, 1.3f, -1.6f, + -1.f, 1.4f, -1.6f, 0.f, 0.f, 0.f, -.9f, -.8f, 1.3f, -1.6f, 0.f, 0.f, 0.f, 3.5f, .8f, -3.1f, 4.8f, 0.f, 0.f, 0.f }}; static struct { real e_1[448]; - } equiv_7 = {{ .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - .7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, 0.f, 0.f, 0.f, 0.f, + } equiv_7 = {{ .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + .7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, 0.f, - 0.f, 0.f, 0.f, 0.f, .7f, -4.8f, 0.f, 0.f, 0.f, 0.f, 0.f, - 1.7f, -.7f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 3.5f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, .7f, -4.8f, 0.f, 0.f, 0.f, 0.f, 0.f, + 1.7f, -.7f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, .7f, 0.f, 0.f, 0.f, .7f, -4.8f, - 3.f, 1.1f, 0.f, 0.f, 0.f, 1.7f, -.7f, -.7f, 2.3f, 0.f, 0.f, - 0.f, -2.6f, 3.5f, -.7f, -3.6f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .7f, 0.f, 0.f, 0.f, + 3.f, 1.1f, 0.f, 0.f, 0.f, 1.7f, -.7f, -.7f, 2.3f, 0.f, 0.f, + 0.f, -2.6f, 3.5f, -.7f, -3.6f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, 0.f, 0.f, 0.f, 0.f, - 4.f, -.9f, -.3f, 0.f, 0.f, 0.f, 0.f, -.5f, -.9f, 1.5f, 0.f, - 0.f, 0.f, 0.f, -1.5f, -.9f, -1.8f, 0.f, 0.f, 0.f, 0.f, .5f, + 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, 0.f, 0.f, 0.f, 0.f, + 4.f, -.9f, -.3f, 0.f, 0.f, 0.f, 0.f, -.5f, -.9f, 1.5f, 0.f, + 0.f, 0.f, 0.f, -1.5f, -.9f, -1.8f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, .7f, -.6f, .2f, .8f, 3.7f, -.9f, -1.2f, .7f, -1.5f, - .2f, 2.2f, -.3f, -.9f, 2.1f, .7f, -1.6f, .2f, 2.f, -1.6f, - -.9f, -2.1f, .7f, 2.9f, .2f, -3.8f, .5f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .7f, 0.f, 0.f, 0.f, 0.f, 0.f, + .2f, 2.2f, -.3f, -.9f, 2.1f, .7f, -1.6f, .2f, 2.f, -1.6f, + -.9f, -2.1f, .7f, 2.9f, .2f, -3.8f, .5f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, .5f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 4.f, + 0.f, 0.f, 0.f, .5f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 4.f, -6.3f, 0.f, 0.f, 0.f, 0.f, 0.f, -.5f, .3f, 0.f, 0.f, 0.f, 0.f, - 0.f, -1.5f, 3.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, - .7f, 0.f, 0.f, 0.f, 3.7f, -7.2f, 3.f, 1.7f, 0.f, 0.f, 0.f, - -.3f, .9f, -.7f, 1.9f, 0.f, 0.f, 0.f, -1.6f, 2.7f, -.7f, - -3.4f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, .7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, -1.5f, 3.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, + .7f, 0.f, 0.f, 0.f, 3.7f, -7.2f, 3.f, 1.7f, 0.f, 0.f, 0.f, + -.3f, .9f, -.7f, 1.9f, 0.f, 0.f, 0.f, -1.6f, 2.7f, -.7f, + -3.4f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, .7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, 0.f, 0.f, 0.f, 0.f, .7f, -.9f, 1.2f, 0.f, 0.f, - 0.f, 0.f, 1.7f, -.9f, .5f, 0.f, 0.f, 0.f, 0.f, -2.6f, -.9f, - -1.3f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, .7f, -.6f, .2f, - .8f, .7f, -.9f, 1.2f, .7f, -1.5f, .2f, 1.6f, 1.7f, -.9f, .5f, - .7f, -1.6f, .2f, 2.4f, -2.6f, -.9f, -1.3f, .7f, 2.9f, .2f, + 0.f, 0.f, 1.7f, -.9f, .5f, 0.f, 0.f, 0.f, 0.f, -2.6f, -.9f, + -1.3f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, .7f, -.6f, .2f, + .8f, .7f, -.9f, 1.2f, .7f, -1.5f, .2f, 1.6f, 1.7f, -.9f, .5f, + .7f, -1.6f, .2f, 2.4f, -2.6f, -.9f, -1.3f, .7f, 2.9f, .2f, -4.f }}; @@ -544,7 +554,7 @@ static real c_b63 = 0.f; real r__1; /* Builtin functions */ - integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); @@ -568,13 +578,13 @@ static real c_b63 = 0.f; #define dt19yd ((real *)&equiv_7 + 336) integer ksize; real ssize[7]; - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer * ), stest_(integer *, real *, real *, real *, real *), saxpy_( integer *, real *, real *, integer *, real *, integer *), srotm_( integer *, real *, integer *, real *, integer *, real *), stest1_( real *, real *, real *, real *); - extern real sdsdot_(integer *, real *, real *, integer *, real *, integer + extern real sdsdot_(integer *, real *, real *, integer *, real *, integer *); /* Fortran I/O blocks */ @@ -627,7 +637,7 @@ static real c_b63 = 0.f; /* .. SDOT .. */ r__1 = sdot_(&combla_1.n, sx, &combla_1.incx, sy, & combla_1.incy); - stest1_(&r__1, &dt7[kn + (ki << 2) - 5], &ssize1[kn - 1], + stest1_(&r__1, &dt7[kn + (ki << 2) - 5], &ssize1[kn - 1], sfac); } else if (combla_1.icase == 2) { /* .. SAXPY .. */ @@ -664,9 +674,9 @@ static real c_b63 = 0.f; for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; - stx[i__ - 1] = dt19x[i__ + (kpar + (kni << 2)) * 7 - + stx[i__ - 1] = dt19x[i__ + (kpar + (kni << 2)) * 7 - 36]; - sty[i__ - 1] = dt19y[i__ + (kpar + (kni << 2)) * 7 - + sty[i__ - 1] = dt19y[i__ + (kpar + (kni << 2)) * 7 - 36]; } @@ -696,7 +706,7 @@ static real c_b63 = 0.f; /* .. SDSROT .. */ r__1 = sdsdot_(&combla_1.n, &c_b39, sx, &combla_1.incx, sy, & combla_1.incy); - stest1_(&r__1, &st7b[kn + (ki << 2) - 5], &ssize3[kn - 1], + stest1_(&r__1, &st7b[kn + (ki << 2) - 5], &ssize3[kn - 1], sfac); } else { s_wsle(&io___80); @@ -759,7 +769,7 @@ static real c_b63 = 0.f; 1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f }; /* Builtin functions */ - integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); @@ -770,12 +780,12 @@ static real c_b63 = 0.f; real mwpc[11]; integer mwpn[11]; real mwps[11]; - extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); real mwpx[5], mwpy[5]; integer ksize; real copyx[5], copyy[5]; - extern /* Subroutine */ int stest_(integer *, real *, real *, real *, + extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *); real mwptx[55] /* was [11][5] */, mwpty[55] /* was [11][5] */; integer mwpinx[11], mwpiny[11]; @@ -1032,7 +1042,7 @@ static real c_b63 = 0.f; sfac) { real scomp[1], strue[1]; - extern /* Subroutine */ int stest_(integer *, real *, real *, real *, + extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *); /* ************************* STEST1 ***************************** */ diff --git a/blastest/src/sblat2.c b/blastest/src/sblat2.c index 54d0a010af..a2ce310f65 100644 --- a/blastest/src/sblat2.c +++ b/blastest/src/sblat2.c @@ -155,10 +155,15 @@ static logical c_false = FALSE_; /* ===================================================================== */ /* Main program */ int main(void) { +#ifdef BLIS_ENABLE_HPX + char* program = "sblat2"; + bli_thread_initialize_hpx( 1, &program ); +#endif + /* Initialized data */ - static char snames[6*16] = "SGEMV " "SGBMV " "SSYMV " "SSBMV " "SSPMV " - "STRMV " "STBMV " "STPMV " "STRSV " "STBSV " "STPSV " "SGER " + static char snames[6*16] = "SGEMV " "SGBMV " "SSYMV " "SSBMV " "SSPMV " + "STRMV " "STBMV " "STPMV " "STRSV " "STBSV " "STPSV " "SGER " "SSYR " "SSPR " "SSYR2 " "SSPR2 "; /* Format strings */ @@ -204,10 +209,10 @@ static logical c_false = FALSE_; cllist cl__1; /* Builtin functions */ - integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, - char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), - s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, + char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), + s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); @@ -228,40 +233,40 @@ static logical c_false = FALSE_; integer ninc, nbet, ntra; logical rewi; integer nout; - extern /* Subroutine */ int schk1_(char *, real *, real *, integer *, - integer *, logical *, logical *, logical *, integer *, integer *, - integer *, integer *, integer *, real *, integer *, real *, + extern /* Subroutine */ int schk1_(char *, real *, real *, integer *, + integer *, logical *, logical *, logical *, integer *, integer *, + integer *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, integer *, real *, real *, real * - , real *, real *, real *, real *, real *, real *, real *, real *, - ftnlen), schk2_(char *, real *, real *, integer *, integer *, - logical *, logical *, logical *, integer *, integer *, integer *, - integer *, integer *, real *, integer *, real *, integer *, - integer *, integer *, integer *, real *, real *, real *, real *, - real *, real *, real *, real *, real *, real *, real *, ftnlen), - schk3_(char *, real *, real *, integer *, integer *, logical *, - logical *, logical *, integer *, integer *, integer *, integer *, + , real *, real *, real *, real *, real *, real *, real *, real *, + ftnlen), schk2_(char *, real *, real *, integer *, integer *, + logical *, logical *, logical *, integer *, integer *, integer *, + integer *, integer *, real *, integer *, real *, integer *, + integer *, integer *, integer *, real *, real *, real *, real *, + real *, real *, real *, real *, real *, real *, real *, ftnlen), + schk3_(char *, real *, real *, integer *, integer *, logical *, + logical *, logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, real *, real *, real * , real *, real *, real *, real *, real *, real *, ftnlen), schk4_( char *, real *, real *, integer *, integer *, logical *, logical * - , logical *, integer *, integer *, integer *, real *, integer *, - integer *, integer *, integer *, real *, real *, real *, real *, - real *, real *, real *, real *, real *, real *, real *, real *, - ftnlen), schk5_(char *, real *, real *, integer *, integer *, - logical *, logical *, logical *, integer *, integer *, integer *, + , logical *, integer *, integer *, integer *, real *, integer *, + integer *, integer *, integer *, real *, real *, real *, real *, + real *, real *, real *, real *, real *, real *, real *, real *, + ftnlen), schk5_(char *, real *, real *, integer *, integer *, + logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, integer *, integer *, integer *, real *, real * - , real *, real *, real *, real *, real *, real *, real *, real *, + , real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk6_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, - integer *, real *, integer *, integer *, integer *, integer *, - real *, real *, real *, real *, real *, real *, real *, real *, + integer *, real *, integer *, integer *, integer *, integer *, + real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen); logical fatal; extern /* Subroutine */ int schke_(integer *, char *, integer *, ftnlen); logical trace; integer nidim; - extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *, - real *, real *, real *, real *, real *, logical *, integer *, + extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *, + real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); char snaps[32], trans[1]; integer isnum; @@ -610,7 +615,7 @@ static logical c_false = FALSE_; goto L80; } for (i__ = 1; i__ <= 16; ++i__) { - if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) + if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L70; } @@ -737,44 +742,44 @@ static logical c_false = FALSE_; /* Test SGEMV, 01, and SGBMV, 02. */ L140: schk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, - &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, + trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, + &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test SSYMV, 03, SSBMV, 04, and SSPMV, 05. */ L150: schk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, - &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, + trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, + &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test STRMV, 06, STBMV, 07, STPMV, 08, */ /* STRSV, 09, STBSV, 10, and STPSV, 11. */ L160: schk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, + trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, &c__65, &c__2, a, aa, as, y, yy, ys, yt, g, z__, (ftnlen) 6); goto L200; /* Test SGER, 12. */ L170: schk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, - inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, + inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test SSYR, 13, and SSPR, 14. */ L180: schk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, - inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, + inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test SSYR2, 15, and SSPR2, 16. */ L190: schk6_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, - inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, + inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); L200: @@ -816,15 +821,20 @@ static logical c_false = FALSE_; /* End of SBLAT2. */ - return 0; +#ifdef BLIS_ENABLE_HPX + return bli_thread_finalize_hpx(); +#else + // Return peacefully. + return 0; +#endif } /* main */ /* Subroutine */ int schk1_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * nalf, real *alf, integer *nbet, real *bet, integer *ninc, integer * - inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, - real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, + inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, + real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, ftnlen sname_len) { /* Initialized data */ @@ -872,24 +882,24 @@ static logical c_false = FALSE_; logical full, tran, null; real alpha; logical isame[13]; - extern /* Subroutine */ int smake_(char *, char *, char *, integer *, - integer *, real *, integer *, real *, integer *, integer *, + extern /* Subroutine */ int smake_(char *, char *, char *, integer *, + integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int sgbmv_(char *, integer *, integer *, integer * , integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *, ftnlen), smvch_(char *, integer *, integer *, - real *, real *, integer *, real *, integer *, real *, real *, - integer *, real *, real *, real *, real *, real *, logical *, + real *, integer *, ftnlen), smvch_(char *, integer *, integer *, + real *, real *, integer *, real *, integer *, real *, real *, + integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen), sgemv_(char *, integer *, integer * - , real *, real *, integer *, real *, integer *, real *, real *, + , real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen); logical reset; integer incxs, incys; char trans[1]; logical banded; real errmax; - extern logical lseres_(char *, char *, integer *, integer *, real *, real + extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; char transs[1]; @@ -1066,9 +1076,9 @@ static logical c_false = FALSE_; transl = 0.f; i__7 = abs(incy); i__8 = ml - 1; - smake_("GE", " ", " ", &c__1, &ml, &y[1], + smake_("GE", " ", " ", &c__1, &ml, &y[1], &c__1, &yy[1], &i__7, &c__0, & - i__8, &reset, &transl, (ftnlen)2, + i__8, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; @@ -1076,7 +1086,7 @@ static logical c_false = FALSE_; /* Save every datum before calling the */ /* subroutine. */ - *(unsigned char *)transs = *(unsigned + *(unsigned char *)transs = *(unsigned char *)trans; ms = m; ns = n; @@ -1134,7 +1144,7 @@ static logical c_false = FALSE_; al__1.aunit = *ntra; f_rew(&al__1); } - sgemv_(trans, &m, &n, &alpha, &aa[1], + sgemv_(trans, &m, &n, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, & yy[1], &incy, (ftnlen)1); } else if (banded) { @@ -1259,8 +1269,8 @@ static logical c_false = FALSE_; smvch_(trans, &m, &n, &alpha, &a[ a_offset], nmax, &x[1], &incx, - &beta, &y[1], &incy, &yt[1], - &g[1], &yy[1], eps, &err, + &beta, &y[1], &incy, &yt[1], + &g[1], &yy[1], eps, &err, fatal, nout, &c_true, (ftnlen) 1); errmax = max(errmax,err); @@ -1365,11 +1375,11 @@ static logical c_false = FALSE_; } /* schk1_ */ /* Subroutine */ int schk2_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * nalf, real *alf, integer *nbet, real *bet, integer *ninc, integer * - inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, - real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, + inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, + real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, ftnlen sname_len) { /* Initialized data */ @@ -1407,7 +1417,7 @@ static logical c_false = FALSE_; f_rew(alist *); /* Local variables */ - integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, + integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, laa, lda; real als, bls; extern logical lse_(real *, real *, integer *); @@ -1419,27 +1429,27 @@ static logical c_false = FALSE_; char uplo[1]; real alpha; logical isame[13]; - extern /* Subroutine */ int smake_(char *, char *, char *, integer *, - integer *, real *, integer *, real *, integer *, integer *, + extern /* Subroutine */ int smake_(char *, char *, char *, integer *, + integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; - extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *, - real *, real *, real *, real *, real *, logical *, integer *, + extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *, + real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs, incys; - extern /* Subroutine */ int ssbmv_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *, + extern /* Subroutine */ int ssbmv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *, ftnlen); char uplos[1]; - extern /* Subroutine */ int sspmv_(char *, integer *, real *, real *, + extern /* Subroutine */ int sspmv_(char *, integer *, real *, real *, real *, integer *, real *, real *, integer *, ftnlen), ssymv_( - char *, integer *, real *, real *, integer *, real *, integer *, + char *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen); logical banded, packed; real errmax; - extern logical lseres_(char *, char *, integer *, integer *, real *, real + extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; @@ -1599,7 +1609,7 @@ static logical c_false = FALSE_; i__8 = n - 1; smake_("GE", " ", " ", &c__1, &n, &y[1], & c__1, &yy[1], &i__7, &c__0, &i__8, & - reset, &transl, (ftnlen)2, (ftnlen)1, + reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; @@ -1816,8 +1826,8 @@ static logical c_false = FALSE_; /* Check the result. */ - smvch_("N", &n, &n, &alpha, &a[a_offset], - nmax, &x[1], &incx, &beta, &y[1], + smvch_("N", &n, &n, &alpha, &a[a_offset], + nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); @@ -1928,10 +1938,10 @@ static logical c_false = FALSE_; } /* schk2_ */ /* Subroutine */ int schk3_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * ninc, integer *inc, integer *nmax, integer *incmax, real *a, real *aa, - real *as, real *x, real *xx, real *xs, real *xt, real *g, real *z__, + real *as, real *x, real *xx, real *xs, real *xt, real *g, real *z__, ftnlen sname_len) { /* Initialized data */ @@ -1971,7 +1981,7 @@ static logical c_false = FALSE_; integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ - integer i__, k, n, nc, ik, in, nk, ks, ix, ns, lx, laa, icd, lda, ict, + integer i__, k, n, nc, ik, in, nk, ks, ix, ns, lx, laa, icd, lda, ict, icu; extern logical lse_(real *, real *, integer *); real err; @@ -1982,32 +1992,32 @@ static logical c_false = FALSE_; logical full, null; char uplo[1], diags[1]; logical isame[13]; - extern /* Subroutine */ int smake_(char *, char *, char *, integer *, - integer *, real *, integer *, real *, integer *, integer *, + extern /* Subroutine */ int smake_(char *, char *, char *, integer *, + integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; - extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *, - real *, real *, real *, real *, real *, logical *, integer *, + extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *, + real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs; char trans[1]; - extern /* Subroutine */ int stbmv_(char *, char *, char *, integer *, - integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, - ftnlen), stbsv_(char *, char *, char *, integer *, integer *, + extern /* Subroutine */ int stbmv_(char *, char *, char *, integer *, + integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, + ftnlen), stbsv_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen); char uplos[1]; - extern /* Subroutine */ int stpmv_(char *, char *, char *, integer *, + extern /* Subroutine */ int stpmv_(char *, char *, char *, integer *, real *, real *, integer *, ftnlen, ftnlen, ftnlen), strmv_(char *, - char *, char *, integer *, real *, integer *, real *, integer *, + char *, char *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), stpsv_(char *, char *, char *, integer *, real *, real *, integer *, ftnlen, ftnlen, ftnlen), strsv_(char * , char *, char *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen); logical banded, packed; real errmax; - extern logical lseres_(char *, char *, integer *, integer *, real *, real + extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; char transs[1]; @@ -2133,13 +2143,13 @@ static logical c_false = FALSE_; ; for (icd = 1; icd <= 2; ++icd) { - *(unsigned char *)diag = *(unsigned char *)&ichd[icd + *(unsigned char *)diag = *(unsigned char *)&ichd[icd - 1]; /* Generate the matrix A. */ transl = 0.f; - smake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], + smake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], nmax, &aa[1], &lda, &k, &k, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -2186,7 +2196,7 @@ static logical c_false = FALSE_; /* Call the subroutine. */ - if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) + if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) == 0) { if (full) { if (*trace) { @@ -2239,7 +2249,7 @@ static logical c_false = FALSE_; al__1.aunit = *ntra; f_rew(&al__1); } - stbmv_(uplo, trans, diag, &n, &k, &aa[1], + stbmv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { @@ -2320,7 +2330,7 @@ static logical c_false = FALSE_; al__1.aunit = *ntra; f_rew(&al__1); } - stbsv_(uplo, trans, diag, &n, &k, &aa[1], + stbsv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { @@ -2362,11 +2372,11 @@ static logical c_false = FALSE_; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)uplo == *(unsigned + isame[0] = *(unsigned char *)uplo == *(unsigned char *)uplos; - isame[1] = *(unsigned char *)trans == *(unsigned + isame[1] = *(unsigned char *)trans == *(unsigned char *)transs; - isame[2] = *(unsigned char *)diag == *(unsigned + isame[2] = *(unsigned char *)diag == *(unsigned char *)diags; isame[3] = ns == n; if (full) { @@ -2437,7 +2447,7 @@ static logical c_false = FALSE_; smvch_(trans, &n, &n, &c_b128, &a[ a_offset], nmax, &x[1], &incx, & c_b120, &z__[1], &incx, &xt[1], & - g[1], &xx[1], eps, &err, fatal, + g[1], &xx[1], eps, &err, fatal, nout, &c_true, (ftnlen)1); } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { @@ -2446,7 +2456,7 @@ static logical c_false = FALSE_; i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { - z__[i__] = xx[(i__ - 1) * abs(incx) + + z__[i__] = xx[(i__ - 1) * abs(incx) + 1]; xx[(i__ - 1) * abs(incx) + 1] = x[i__] ; @@ -2455,7 +2465,7 @@ static logical c_false = FALSE_; smvch_(trans, &n, &n, &c_b128, &a[ a_offset], nmax, &z__[1], &incx, & c_b120, &x[1], &incx, &xt[1], &g[ - 1], &xx[1], eps, &err, fatal, + 1], &xx[1], eps, &err, fatal, nout, &c_false, (ftnlen)1); } errmax = max(errmax,err); @@ -2558,10 +2568,10 @@ static logical c_false = FALSE_; } /* schk3_ */ /* Subroutine */ int schk4_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, real *a, real *aa, - real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, + real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, real *z__, ftnlen sname_len) { /* Format strings */ @@ -2599,24 +2609,24 @@ static logical c_false = FALSE_; real err; integer ldas; logical same; - extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, + extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, integer *, real *, integer *, real *, integer *); integer incx, incy; logical null; real alpha; logical isame[13]; - extern /* Subroutine */ int smake_(char *, char *, char *, integer *, - integer *, real *, integer *, real *, integer *, integer *, + extern /* Subroutine */ int smake_(char *, char *, char *, integer *, + integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; - extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *, - real *, real *, real *, real *, real *, logical *, integer *, + extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *, + real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs, incys; real errmax; - extern logical lseres_(char *, char *, integer *, integer *, real *, real + extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; @@ -2718,7 +2728,7 @@ static logical c_false = FALSE_; i__3 = abs(incx); i__4 = m - 1; smake_("GE", " ", " ", &c__1, &m, &x[1], &c__1, &xx[1], &i__3, - &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, + &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (m > 1) { x[m / 2] = 0.f; @@ -2752,7 +2762,7 @@ static logical c_false = FALSE_; transl = 0.f; i__5 = m - 1; i__6 = n - 1; - smake_(sname + 1, " ", " ", &m, &n, &a[a_offset], + smake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -2883,9 +2893,9 @@ static logical c_false = FALSE_; } else { w[0] = y[n - j + 1]; } - smvch_("N", &m, &c__1, &alpha, &z__[1], nmax, + smvch_("N", &m, &c__1, &alpha, &z__[1], nmax, w, &c__1, &c_b128, &a[j * a_dim1 + 1], - &c__1, &yt[1], &g[1], &aa[(j - 1) * + &c__1, &yt[1], &g[1], &aa[(j - 1) * lda + 1], eps, &err, fatal, nout, & c_true, (ftnlen)1); errmax = max(errmax,err); @@ -2966,10 +2976,10 @@ static logical c_false = FALSE_; } /* schk4_ */ /* Subroutine */ int schk5_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, real *a, real *aa, - real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, + real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, real *z__, ftnlen sname_len) { /* Initialized data */ @@ -3017,18 +3027,18 @@ static logical c_false = FALSE_; integer incx; logical full, null; char uplo[1]; - extern /* Subroutine */ int sspr_(char *, integer *, real *, real *, - integer *, real *, ftnlen), ssyr_(char *, integer *, real *, real + extern /* Subroutine */ int sspr_(char *, integer *, real *, real *, + integer *, real *, ftnlen), ssyr_(char *, integer *, real *, real *, integer *, real *, integer *, ftnlen); real alpha; logical isame[13]; - extern /* Subroutine */ int smake_(char *, char *, char *, integer *, - integer *, real *, integer *, real *, integer *, integer *, + extern /* Subroutine */ int smake_(char *, char *, char *, integer *, + integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; - extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *, - real *, real *, real *, real *, real *, logical *, integer *, + extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *, + real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs; @@ -3036,7 +3046,7 @@ static logical c_false = FALSE_; char uplos[1]; logical packed; real errmax; - extern logical lseres_(char *, char *, integer *, integer *, real *, real + extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; @@ -3140,7 +3150,7 @@ static logical c_false = FALSE_; i__3 = abs(incx); i__4 = n - 1; smake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, - &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, + &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.f; @@ -3309,9 +3319,9 @@ static logical c_false = FALSE_; jj = j; lj = n - j + 1; } - smvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, + smvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, &c__1, &c_b128, &a[jj + j * a_dim1], & - c__1, &yt[1], &g[1], &aa[ja], eps, &err, + c__1, &yt[1], &g[1], &aa[ja], eps, &err, fatal, nout, &c_true, (ftnlen)1); if (full) { if (upper) { @@ -3410,10 +3420,10 @@ static logical c_false = FALSE_; } /* schk5_ */ /* Subroutine */ int schk6_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, real *a, real *aa, - real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, + real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, real *z__, ftnlen sname_len) { /* Initialized data */ @@ -3442,7 +3452,7 @@ static logical c_false = FALSE_; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, + integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; @@ -3462,19 +3472,19 @@ static logical c_false = FALSE_; integer incx, incy; logical full, null; char uplo[1]; - extern /* Subroutine */ int sspr2_(char *, integer *, real *, real *, - integer *, real *, integer *, real *, ftnlen), ssyr2_(char *, - integer *, real *, real *, integer *, real *, integer *, real *, + extern /* Subroutine */ int sspr2_(char *, integer *, real *, real *, + integer *, real *, integer *, real *, ftnlen), ssyr2_(char *, + integer *, real *, real *, integer *, real *, integer *, real *, integer *, ftnlen); real alpha; logical isame[13]; - extern /* Subroutine */ int smake_(char *, char *, char *, integer *, - integer *, real *, integer *, real *, integer *, integer *, + extern /* Subroutine */ int smake_(char *, char *, char *, integer *, + integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; - extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *, - real *, real *, real *, real *, real *, logical *, integer *, + extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *, + real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs, incys; @@ -3482,7 +3492,7 @@ static logical c_false = FALSE_; char uplos[1]; logical packed; real errmax; - extern logical lseres_(char *, char *, integer *, integer *, real *, real + extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; @@ -3588,7 +3598,7 @@ static logical c_false = FALSE_; i__3 = abs(incx); i__4 = n - 1; smake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, - &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, + &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.f; @@ -3623,7 +3633,7 @@ static logical c_false = FALSE_; transl = 0.f; i__5 = n - 1; i__6 = n - 1; - smake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], + smake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -3801,7 +3811,7 @@ static logical c_false = FALSE_; jj = j; lj = n - j + 1; } - smvch_("N", &lj, &c__2, &alpha, &z__[jj + + smvch_("N", &lj, &c__2, &alpha, &z__[jj + z_dim1], nmax, w, &c__1, &c_b128, &a[ jj + j * a_dim1], &c__1, &yt[1], &g[1] , &aa[ja], eps, &err, fatal, nout, & @@ -3907,7 +3917,7 @@ static logical c_false = FALSE_; } /* schk6_ */ -/* Subroutine */ int schke_(integer *isnum, char *srnamt, integer *nout, +/* Subroutine */ int schke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ @@ -3921,35 +3931,35 @@ static logical c_false = FALSE_; /* Local variables */ real a[1] /* was [1][1] */, x[1], y[1], beta; - extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, - integer *, real *, integer *, real *, integer *), sspr_(char *, - integer *, real *, real *, integer *, real *, ftnlen), ssyr_(char - *, integer *, real *, real *, integer *, real *, integer *, - ftnlen), sspr2_(char *, integer *, real *, real *, integer *, - real *, integer *, real *, ftnlen), ssyr2_(char *, integer *, - real *, real *, integer *, real *, integer *, real *, integer *, + extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *), sspr_(char *, + integer *, real *, real *, integer *, real *, ftnlen), ssyr_(char + *, integer *, real *, real *, integer *, real *, integer *, + ftnlen), sspr2_(char *, integer *, real *, real *, integer *, + real *, integer *, real *, ftnlen), ssyr2_(char *, integer *, + real *, real *, integer *, real *, integer *, real *, integer *, ftnlen); real alpha; extern /* Subroutine */ int sgbmv_(char *, integer *, integer *, integer * , integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *, ftnlen), sgemv_(char *, integer *, integer *, - real *, real *, integer *, real *, integer *, real *, real *, - integer *, ftnlen), ssbmv_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *, - ftnlen), stbmv_(char *, char *, char *, integer *, integer *, - real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), - stbsv_(char *, char *, char *, integer *, integer *, real *, + real *, integer *, ftnlen), sgemv_(char *, integer *, integer *, + real *, real *, integer *, real *, integer *, real *, real *, + integer *, ftnlen), ssbmv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *, + ftnlen), stbmv_(char *, char *, char *, integer *, integer *, + real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), + stbsv_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), sspmv_( - char *, integer *, real *, real *, real *, integer *, real *, - real *, integer *, ftnlen), stpmv_(char *, char *, char *, - integer *, real *, real *, integer *, ftnlen, ftnlen, ftnlen), - strmv_(char *, char *, char *, integer *, real *, integer *, real - *, integer *, ftnlen, ftnlen, ftnlen), stpsv_(char *, char *, - char *, integer *, real *, real *, integer *, ftnlen, ftnlen, - ftnlen), ssymv_(char *, integer *, real *, real *, integer *, + char *, integer *, real *, real *, real *, integer *, real *, + real *, integer *, ftnlen), stpmv_(char *, char *, char *, + integer *, real *, real *, integer *, ftnlen, ftnlen, ftnlen), + strmv_(char *, char *, char *, integer *, real *, integer *, real + *, integer *, ftnlen, ftnlen, ftnlen), stpsv_(char *, char *, + char *, integer *, real *, real *, integer *, ftnlen, ftnlen, + ftnlen), ssymv_(char *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen), strsv_( - char *, char *, char *, integer *, real *, integer *, real *, - integer *, ftnlen, ftnlen, ftnlen), chkxer_(char *, integer *, + char *, char *, char *, integer *, real *, integer *, real *, + integer *, ftnlen, ftnlen, ftnlen), chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ @@ -4455,9 +4465,9 @@ static logical c_false = FALSE_; } /* schke_ */ -/* Subroutine */ int smake_(char *type__, char *uplo, char *diag, integer *m, +/* Subroutine */ int smake_(char *type__, char *uplo, char *diag, integer *m, integer *n, real *a, integer *nmax, real *aa, integer *lda, integer * - kl, integer *ku, logical *reset, real *transl, ftnlen type_len, + kl, integer *ku, logical *reset, real *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ @@ -4516,7 +4526,7 @@ static logical c_false = FALSE_; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { - if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) + if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) { a[i__ + j * a_dim1] = sbeg_(reset) + *transl; } else { @@ -4690,9 +4700,9 @@ static logical c_false = FALSE_; } /* smake_ */ -/* Subroutine */ int smvch_(char *trans, integer *m, integer *n, real *alpha, - real *a, integer *nmax, real *x, integer *incx, real *beta, real *y, - integer *incy, real *yt, real *g, real *yy, real *eps, real *err, +/* Subroutine */ int smvch_(char *trans, integer *m, integer *n, real *alpha, + real *a, integer *nmax, real *x, integer *incx, real *beta, real *y, + integer *incy, real *yt, real *g, real *yy, real *eps, real *err, logical *fatal, integer *nout, logical *mv, ftnlen trans_len) { /* Format strings */ @@ -4807,7 +4817,7 @@ static logical c_false = FALSE_; *err = 0.f; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { - erri = (r__1 = yt[i__] - yy[(i__ - 1) * abs(*incy) + 1], abs(r__1)) / + erri = (r__1 = yt[i__] - yy[(i__ - 1) * abs(*incy) + 1], abs(r__1)) / *eps; if (g[i__] != 0.f) { erri /= g[i__]; @@ -4903,7 +4913,7 @@ logical lse_(real *ri, real *rj, integer *lr) } /* lse_ */ -logical lseres_(char *type__, char *uplo, integer *m, integer *n, real *aa, +logical lseres_(char *type__, char *uplo, integer *m, integer *n, real *aa, real *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ @@ -5064,7 +5074,7 @@ real sdiff_(real *x, real *y) } /* sdiff_ */ -/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, +/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ diff --git a/blastest/src/sblat3.c b/blastest/src/sblat3.c index dc5ef5738b..01d4ca4b8b 100644 --- a/blastest/src/sblat3.c +++ b/blastest/src/sblat3.c @@ -135,9 +135,14 @@ static integer c__2 = 2; /* ===================================================================== */ /* Main program */ int main(void) { +#ifdef BLIS_ENABLE_HPX + char* program = "sblat3"; + bli_thread_initialize_hpx( 1, &program ); +#endif + /* Initialized data */ - static char snames[6*6] = "SGEMM " "SSYMM " "STRMM " "STRSM " "SSYRK " + static char snames[6*6] = "SGEMM " "SSYMM " "STRMM " "STRSM " "SSYRK " "SSYR2K"; /* Format strings */ @@ -179,10 +184,10 @@ static integer c__2 = 2; cllist cl__1; /* Builtin functions */ - integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, - char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), - s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, + char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), + s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); @@ -200,33 +205,33 @@ static integer c__2 = 2; integer nbet, ntra; logical rewi; integer nout; - extern /* Subroutine */ int schk1_(char *, real *, real *, integer *, - integer *, logical *, logical *, logical *, integer *, integer *, - integer *, real *, integer *, real *, integer *, real *, real *, - real *, real *, real *, real *, real *, real *, real *, real *, - real *, ftnlen), schk2_(char *, real *, real *, integer *, - integer *, logical *, logical *, logical *, integer *, integer *, - integer *, real *, integer *, real *, integer *, real *, real *, - real *, real *, real *, real *, real *, real *, real *, real *, - real *, ftnlen), schk3_(char *, real *, real *, integer *, - integer *, logical *, logical *, logical *, integer *, integer *, - integer *, real *, integer *, real *, real *, real *, real *, - real *, real *, real *, real *, real *, ftnlen), schk4_(char *, - real *, real *, integer *, integer *, logical *, logical *, - logical *, integer *, integer *, integer *, real *, integer *, + extern /* Subroutine */ int schk1_(char *, real *, real *, integer *, + integer *, logical *, logical *, logical *, integer *, integer *, + integer *, real *, integer *, real *, integer *, real *, real *, + real *, real *, real *, real *, real *, real *, real *, real *, + real *, ftnlen), schk2_(char *, real *, real *, integer *, + integer *, logical *, logical *, logical *, integer *, integer *, + integer *, real *, integer *, real *, integer *, real *, real *, + real *, real *, real *, real *, real *, real *, real *, real *, + real *, ftnlen), schk3_(char *, real *, real *, integer *, + integer *, logical *, logical *, logical *, integer *, integer *, + integer *, real *, integer *, real *, real *, real *, real *, + real *, real *, real *, real *, real *, ftnlen), schk4_(char *, + real *, real *, integer *, integer *, logical *, logical *, + logical *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, - real *, real *, real *, real *, real *, ftnlen), schk5_(char *, - real *, real *, integer *, integer *, logical *, logical *, - logical *, integer *, integer *, integer *, real *, integer *, + real *, real *, real *, real *, real *, ftnlen), schk5_(char *, + real *, real *, integer *, integer *, logical *, logical *, + logical *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen); logical fatal; extern /* Subroutine */ int schke_(integer *, char *, integer *, ftnlen); logical trace; integer nidim; - extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *, real *, real *, real *, integer *, real *, + extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); char snaps[32]; integer isnum; @@ -496,7 +501,7 @@ static integer c__2 = 2; goto L60; } for (i__ = 1; i__ <= 6; ++i__) { - if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) + if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L50; } @@ -662,34 +667,34 @@ static integer c__2 = 2; /* Test SGEMM, 01. */ L140: schk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, - bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test SSYMM, 02. */ L150: schk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, - bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test STRMM, 03, STRSM, 04. */ L160: schk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, ab, aa, as, &ab[4225], bb, bs, ct, g, c__, (ftnlen)6); goto L190; /* Test SSYRK, 05. */ L170: schk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, - bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test SSYR2K, 06. */ L180: schk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, bb, bs, c__, cc, cs, ct, g, w, ( ftnlen)6); goto L190; @@ -733,14 +738,19 @@ static integer c__2 = 2; /* End of SBLAT3. */ - return 0; +#ifdef BLIS_ENABLE_HPX + return bli_thread_finalize_hpx(); +#else + // Return peacefully. + return 0; +#endif } /* main */ /* Subroutine */ int schk1_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * - nbet, real *bet, integer *nmax, real *a, real *aa, real *as, real *b, - real *bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, + nbet, real *bet, integer *nmax, real *a, real *aa, real *as, real *b, + real *bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, ftnlen sname_len) { /* Initialized data */ @@ -764,7 +774,7 @@ static integer c__2 = 2; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; @@ -773,7 +783,7 @@ static integer c__2 = 2; f_rew(alist *); /* Local variables */ - integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, + integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, ica, icb, laa, lbb, lda, lcc, ldb, ldc; real als, bls; extern logical lse_(real *, real *, integer *); @@ -782,22 +792,22 @@ static integer c__2 = 2; logical same, null; real alpha; logical isame[13]; - extern /* Subroutine */ int smake_(char *, char *, char *, integer *, + extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * , ftnlen, ftnlen, ftnlen); logical trana, tranb; - extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *, real *, real *, real *, integer *, real *, + extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen), sgemm_( - char *, char *, integer *, integer *, integer *, real *, real *, - integer *, real *, integer *, real *, real *, integer *, ftnlen, + char *, char *, integer *, integer *, integer *, real *, real *, + integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen); integer nargs; logical reset; char tranas[1], tranbs[1], transa[1], transb[1]; real errmax; - extern logical lseres_(char *, char *, integer *, integer *, real *, real + extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ @@ -888,7 +898,7 @@ static integer c__2 = 2; for (ica = 1; ica <= 3; ++ica) { *(unsigned char *)transa = *(unsigned char *)&ich[ica - 1] ; - trana = *(unsigned char *)transa == 'T' || *(unsigned + trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; if (trana) { @@ -916,9 +926,9 @@ static integer c__2 = 2; ftnlen)1); for (icb = 1; icb <= 3; ++icb) { - *(unsigned char *)transb = *(unsigned char *)&ich[icb + *(unsigned char *)transb = *(unsigned char *)&ich[icb - 1]; - tranb = *(unsigned char *)transb == 'T' || *(unsigned + tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; if (tranb) { @@ -1090,9 +1100,9 @@ static integer c__2 = 2; smmch_(transa, transb, &m, &n, &k, &alpha, &a[a_offset], nmax, &b[b_offset], - nmax, &beta, &c__[c_offset], + nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, - eps, &err, fatal, nout, &c_true, + eps, &err, fatal, nout, &c_true, (ftnlen)1, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ @@ -1174,10 +1184,10 @@ static integer c__2 = 2; } /* schk1_ */ /* Subroutine */ int schk2_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * - nbet, real *bet, integer *nmax, real *a, real *aa, real *as, real *b, - real *bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, + nbet, real *bet, integer *nmax, real *a, real *aa, real *as, real *b, + real *bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, ftnlen sname_len) { /* Initialized data */ @@ -1202,7 +1212,7 @@ static integer c__2 = 2; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; @@ -1211,7 +1221,7 @@ static integer c__2 = 2; f_rew(alist *); /* Local variables */ - integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, + integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, ldb, ldc, ics; real als, bls; integer icu; @@ -1224,22 +1234,22 @@ static integer c__2 = 2; char uplo[1]; real alpha; logical isame[13]; - extern /* Subroutine */ int smake_(char *, char *, char *, integer *, + extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * , ftnlen, ftnlen, ftnlen); char sides[1]; - extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *, real *, real *, real *, integer *, real *, + extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); integer nargs; logical reset; char uplos[1]; - extern /* Subroutine */ int ssymm_(char *, char *, integer *, integer *, - real *, real *, integer *, real *, integer *, real *, real *, + extern /* Subroutine */ int ssymm_(char *, char *, integer *, integer *, + real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen); real errmax; - extern logical lseres_(char *, char *, integer *, integer *, real *, real + extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ @@ -1378,7 +1388,7 @@ static integer c__2 = 2; /* Generate the matrix C. */ - smake_("GE", " ", " ", &m, &n, &c__[c_offset], + smake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b84, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -1459,9 +1469,9 @@ static integer c__2 = 2; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)sides == *(unsigned + isame[0] = *(unsigned char *)sides == *(unsigned char *)side; - isame[1] = *(unsigned char *)uplos == *(unsigned + isame[1] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[2] = ms == m; isame[3] = ns == n; @@ -1506,14 +1516,14 @@ static integer c__2 = 2; if (left) { smmch_("N", "N", &m, &n, &m, &alpha, &a[ - a_offset], nmax, &b[b_offset], + a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { smmch_("N", "N", &m, &n, &n, &alpha, &b[ - b_offset], nmax, &a[a_offset], + b_offset], nmax, &a[a_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( @@ -1594,7 +1604,7 @@ static integer c__2 = 2; } /* schk2_ */ /* Subroutine */ int schk3_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * nmax, real *a, real *aa, real *as, real *b, real *bb, real *bs, real * ct, real *g, real *c__, ftnlen sname_len) @@ -1623,7 +1633,7 @@ static integer c__2 = 2; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; @@ -1647,25 +1657,25 @@ static integer c__2 = 2; real alpha; char diags[1]; logical isame[13]; - extern /* Subroutine */ int smake_(char *, char *, char *, integer *, + extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * , ftnlen, ftnlen, ftnlen); char sides[1]; - extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *, real *, real *, real *, integer *, real *, + extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); integer nargs; logical reset; char uplos[1]; - extern /* Subroutine */ int strmm_(char *, char *, char *, char *, + extern /* Subroutine */ int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * - , ftnlen, ftnlen, ftnlen, ftnlen), strsm_(char *, char *, char *, - char *, integer *, integer *, real *, real *, integer *, real *, + , ftnlen, ftnlen, ftnlen, ftnlen), strsm_(char *, char *, char *, + char *, integer *, integer *, real *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); char tranas[1], transa[1]; real errmax; - extern logical lseres_(char *, char *, integer *, integer *, real *, real + extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ @@ -1800,7 +1810,7 @@ static integer c__2 = 2; /* Generate the matrix B. */ - smake_("GE", " ", " ", &m, &n, &b[b_offset], + smake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, &reset, &c_b84, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -1866,7 +1876,7 @@ static integer c__2 = 2; } strmm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, - (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } else if (s_cmp(sname + 3, "SM", (ftnlen)2, ( ftnlen)2) == 0) { @@ -1899,7 +1909,7 @@ static integer c__2 = 2; } strsm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, - (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } @@ -1968,18 +1978,18 @@ static integer c__2 = 2; smmch_(transa, "N", &m, &n, &m, & alpha, &a[a_offset], nmax, &b[b_offset], nmax, & - c_b84, &c__[c_offset], + c_b84, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ - 1], &ldb, eps, &err, + 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { smmch_("N", transa, &m, &n, &n, & alpha, &b[b_offset], nmax, &a[a_offset], nmax, & - c_b84, &c__[c_offset], + c_b84, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ - 1], &ldb, eps, &err, + 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } @@ -1992,10 +2002,10 @@ static integer c__2 = 2; i__4 = n; for (j = 1; j <= i__4; ++j) { i__5 = m; - for (i__ = 1; i__ <= i__5; ++i__) + for (i__ = 1; i__ <= i__5; ++i__) { c__[i__ + j * c_dim1] = bb[i__ + (j - 1) * ldb]; - bb[i__ + (j - 1) * ldb] = alpha * b[i__ + j * + bb[i__ + (j - 1) * ldb] = alpha * b[i__ + j * b_dim1]; /* L60: */ } @@ -2008,16 +2018,16 @@ static integer c__2 = 2; &c__[c_offset], nmax, & c_b84, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & - ldb, eps, &err, fatal, + ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } else { smmch_("N", transa, &m, &n, &n, & - c_b94, &c__[c_offset], - nmax, &a[a_offset], nmax, - &c_b84, &b[b_offset], + c_b94, &c__[c_offset], + nmax, &a[a_offset], nmax, + &c_b84, &b[b_offset], nmax, &ct[1], &g[1], &bb[ - 1], &ldb, eps, &err, + 1], &ldb, eps, &err, fatal, nout, &c_false, ( ftnlen)1, (ftnlen)1); } @@ -2099,10 +2109,10 @@ static integer c__2 = 2; } /* schk3_ */ /* Subroutine */ int schk4_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * - nbet, real *bet, integer *nmax, real *a, real *aa, real *as, real *b, - real *bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, + nbet, real *bet, integer *nmax, real *a, real *aa, real *as, real *b, + real *bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, ftnlen sname_len) { /* Initialized data */ @@ -2129,7 +2139,7 @@ static integer c__2 = 2; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; @@ -2151,22 +2161,22 @@ static integer c__2 = 2; char uplo[1]; real alpha; logical isame[13]; - extern /* Subroutine */ int smake_(char *, char *, char *, integer *, + extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * - , ftnlen, ftnlen, ftnlen), smmch_(char *, char *, integer *, + , ftnlen, ftnlen, ftnlen), smmch_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer * - , real *, real *, integer *, real *, real *, real *, integer *, + , real *, real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); integer nargs; logical reset; char trans[1]; logical upper; char uplos[1]; - extern /* Subroutine */ int ssyrk_(char *, char *, integer *, integer *, - real *, real *, integer *, real *, real *, integer *, ftnlen, + extern /* Subroutine */ int ssyrk_(char *, char *, integer *, integer *, + real *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen); real errmax; - extern logical lseres_(char *, char *, integer *, integer *, real *, real + extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); char transs[1]; @@ -2293,7 +2303,7 @@ static integer c__2 = 2; /* Generate the matrix C. */ - smake_("SY", uplo, " ", &n, &n, &c__[c_offset], + smake_("SY", uplo, " ", &n, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b84, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -2350,7 +2360,7 @@ static integer c__2 = 2; al__1.aunit = *ntra; f_rew(&al__1); } - ssyrk_(uplo, trans, &n, &k, &alpha, &aa[1], &lda, + ssyrk_(uplo, trans, &n, &k, &alpha, &aa[1], &lda, &beta, &cc[1], &ldc, (ftnlen)1, (ftnlen)1) ; @@ -2366,9 +2376,9 @@ static integer c__2 = 2; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)uplos == *(unsigned + isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; - isame[1] = *(unsigned char *)transs == *(unsigned + isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; @@ -2421,19 +2431,19 @@ static integer c__2 = 2; } if (tran) { smmch_("T", "N", &lj, &c__1, &k, & - alpha, &a[jj * a_dim1 + 1], - nmax, &a[j * a_dim1 + 1], - nmax, &beta, &c__[jj + j * - c_dim1], nmax, &ct[1], &g[1], - &cc[jc], &ldc, eps, &err, + alpha, &a[jj * a_dim1 + 1], + nmax, &a[j * a_dim1 + 1], + nmax, &beta, &c__[jj + j * + c_dim1], nmax, &ct[1], &g[1], + &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { smmch_("N", "T", &lj, &c__1, &k, & - alpha, &a[jj + a_dim1], nmax, + alpha, &a[jj + a_dim1], nmax, &a[j + a_dim1], nmax, &beta, & c__[jj + j * c_dim1], nmax, & - ct[1], &g[1], &cc[jc], &ldc, + ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, & c_true, (ftnlen)1, (ftnlen)1); } @@ -2526,7 +2536,7 @@ static integer c__2 = 2; } /* schk4_ */ /* Subroutine */ int schk5_(char *sname, real *eps, real *thresh, integer * - nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, + nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * nbet, real *bet, integer *nmax, real *ab, real *aa, real *as, real * bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, real * @@ -2579,22 +2589,22 @@ static integer c__2 = 2; char uplo[1]; real alpha; logical isame[13]; - extern /* Subroutine */ int smake_(char *, char *, char *, integer *, + extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * - , ftnlen, ftnlen, ftnlen), smmch_(char *, char *, integer *, + , ftnlen, ftnlen, ftnlen), smmch_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer * - , real *, real *, integer *, real *, real *, real *, integer *, + , real *, real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); integer nargs; logical reset; char trans[1]; logical upper; char uplos[1]; - extern /* Subroutine */ int ssyr2k_(char *, char *, integer *, integer *, - real *, real *, integer *, real *, integer *, real *, real *, + extern /* Subroutine */ int ssyr2k_(char *, char *, integer *, integer *, + real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen); real errmax; - extern logical lseres_(char *, char *, integer *, integer *, real *, real + extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); char transs[1]; @@ -2740,7 +2750,7 @@ static integer c__2 = 2; /* Generate the matrix C. */ - smake_("SY", uplo, " ", &n, &n, &c__[c_offset], + smake_("SY", uplo, " ", &n, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b84, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -2821,9 +2831,9 @@ static integer c__2 = 2; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)uplos == *(unsigned + isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; - isame[1] = *(unsigned char *)transs == *(unsigned + isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; @@ -2880,7 +2890,7 @@ static integer c__2 = 2; if (tran) { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { - w[i__] = ab[(j - 1 << 1) * *nmax + w[i__] = ab[(j - 1 << 1) * *nmax + k + i__]; w[k + i__] = ab[(j - 1 << 1) * * nmax + i__]; @@ -2891,17 +2901,17 @@ static integer c__2 = 2; i__8 = *nmax << 1; smmch_("T", "N", &lj, &c__1, &i__6, & alpha, &ab[jjab], &i__7, &w[1] - , &i__8, &beta, &c__[jj + j * - c_dim1], nmax, &ct[1], &g[1], - &cc[jc], &ldc, eps, &err, + , &i__8, &beta, &c__[jj + j * + c_dim1], nmax, &ct[1], &g[1], + &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { - w[i__] = ab[(k + i__ - 1) * *nmax + w[i__] = ab[(k + i__ - 1) * *nmax + j]; - w[k + i__] = ab[(i__ - 1) * *nmax + w[k + i__] = ab[(i__ - 1) * *nmax + j]; /* L60: */ } @@ -2909,9 +2919,9 @@ static integer c__2 = 2; i__7 = *nmax << 1; smmch_("N", "N", &lj, &c__1, &i__6, & alpha, &ab[jj], nmax, &w[1], & - i__7, &beta, &c__[jj + j * - c_dim1], nmax, &ct[1], &g[1], - &cc[jc], &ldc, eps, &err, + i__7, &beta, &c__[jj + j * + c_dim1], nmax, &ct[1], &g[1], + &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } @@ -3007,7 +3017,7 @@ static integer c__2 = 2; } /* schk5_ */ -/* Subroutine */ int schke_(integer *isnum, char *srnamt, integer *nout, +/* Subroutine */ int schke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ @@ -3020,22 +3030,22 @@ static integer c__2 = 2; integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ - real a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] /* + real a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] /* was [2][1] */, beta, alpha; - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen), strmm_(char *, char *, char *, - char *, integer *, integer *, real *, real *, integer *, real *, + char *, integer *, integer *, real *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), ssymm_(char *, char *, - integer *, integer *, real *, real *, integer *, real *, integer - *, real *, real *, integer *, ftnlen, ftnlen), strsm_(char *, - char *, char *, char *, integer *, integer *, real *, real *, - integer *, real *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), - ssyrk_(char *, char *, integer *, integer *, real *, real *, + integer *, integer *, real *, real *, integer *, real *, integer + *, real *, real *, integer *, ftnlen, ftnlen), strsm_(char *, + char *, char *, char *, integer *, integer *, real *, real *, + integer *, real *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), + ssyrk_(char *, char *, integer *, integer *, real *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen), ssyr2k_( - char *, char *, integer *, integer *, real *, real *, integer *, - real *, integer *, real *, real *, integer *, ftnlen, ftnlen), - chkxer_(char *, integer *, integer *, logical *, logical *, + char *, char *, integer *, integer *, real *, real *, integer *, + real *, integer *, real *, real *, integer *, ftnlen, ftnlen), + chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ @@ -3089,142 +3099,142 @@ static integer c__2 = 2; } L10: infoc_1.infot = 1; - sgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; - sgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; - sgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; - sgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - sgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - sgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - sgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - sgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - sgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - sgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - sgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - sgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - sgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - sgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - sgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - sgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - sgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - sgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - sgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, + sgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - sgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - sgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - sgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, + sgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - sgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - sgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - sgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, + sgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - sgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, + sgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - sgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - sgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + sgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); @@ -3928,9 +3938,9 @@ static integer c__2 = 2; } /* schke_ */ -/* Subroutine */ int smake_(char *type__, char *uplo, char *diag, integer *m, +/* Subroutine */ int smake_(char *type__, char *uplo, char *diag, integer *m, integer *n, real *a, integer *nmax, real *aa, integer *lda, logical * - reset, real *transl, ftnlen type_len, ftnlen uplo_len, ftnlen + reset, real *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ @@ -4075,7 +4085,7 @@ static integer c__2 = 2; /* Subroutine */ int smmch_(char *transa, char *transb, integer *m, integer * n, integer *kk, real *alpha, real *a, integer *lda, real *b, integer * ldb, real *beta, real *c__, integer *ldc, real *ct, real *g, real *cc, - integer *ldcc, real *eps, real *err, logical *fatal, integer *nout, + integer *ldcc, real *eps, real *err, logical *fatal, integer *nout, logical *mv, ftnlen transa_len, ftnlen transb_len) { /* Format strings */ @@ -4087,7 +4097,7 @@ static integer c__2 = 2; " \002,i3)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, cc_offset, i__1, i__2, i__3; real r__1, r__2; @@ -4141,9 +4151,9 @@ static integer c__2 = 2; cc -= cc_offset; /* Function Body */ - trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == + trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; - tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == + tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; /* Compute expected result, one column at a time, in CT using data */ @@ -4165,7 +4175,7 @@ static integer c__2 = 2; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[i__ + k * a_dim1] * b[k + j * b_dim1]; - g[i__] += (r__1 = a[i__ + k * a_dim1], abs(r__1)) * (r__2 + g[i__] += (r__1 = a[i__ + k * a_dim1], abs(r__1)) * (r__2 = b[k + j * b_dim1], abs(r__2)); /* L20: */ } @@ -4177,7 +4187,7 @@ static integer c__2 = 2; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[k + i__ * a_dim1] * b[k + j * b_dim1]; - g[i__] += (r__1 = a[k + i__ * a_dim1], abs(r__1)) * (r__2 + g[i__] += (r__1 = a[k + i__ * a_dim1], abs(r__1)) * (r__2 = b[k + j * b_dim1], abs(r__2)); /* L40: */ } @@ -4189,7 +4199,7 @@ static integer c__2 = 2; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[i__ + k * a_dim1] * b[j + k * b_dim1]; - g[i__] += (r__1 = a[i__ + k * a_dim1], abs(r__1)) * (r__2 + g[i__] += (r__1 = a[i__ + k * a_dim1], abs(r__1)) * (r__2 = b[j + k * b_dim1], abs(r__2)); /* L60: */ } @@ -4201,7 +4211,7 @@ static integer c__2 = 2; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[k + i__ * a_dim1] * b[j + k * b_dim1]; - g[i__] += (r__1 = a[k + i__ * a_dim1], abs(r__1)) * (r__2 + g[i__] += (r__1 = a[k + i__ * a_dim1], abs(r__1)) * (r__2 = b[j + k * b_dim1], abs(r__2)); /* L80: */ } @@ -4328,7 +4338,7 @@ logical lse_(real *ri, real *rj, integer *lr) } /* lse_ */ -logical lseres_(char *type__, char *uplo, integer *m, integer *n, real *aa, +logical lseres_(char *type__, char *uplo, integer *m, integer *n, real *aa, real *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ @@ -4495,7 +4505,7 @@ real sdiff_(real *x, real *y) } /* sdiff_ */ -/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, +/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ diff --git a/blastest/src/zblat1.c b/blastest/src/zblat1.c index b620910beb..93a24f4c31 100644 --- a/blastest/src/zblat1.c +++ b/blastest/src/zblat1.c @@ -68,6 +68,11 @@ static doublereal c_b52 = 0.; /* ===================================================================== */ /* Main program */ int main(void) { +#ifdef BLIS_ENABLE_HPX + char* program = "zblat1"; + bli_thread_initialize_hpx( 1, &program ); +#endif + /* Initialized data */ static doublereal sfac = 9.765625e-4; @@ -84,7 +89,7 @@ static doublereal c_b52 = 0.; /* Local variables */ integer ic; - extern /* Subroutine */ int check1_(doublereal *), check2_(doublereal *), + extern /* Subroutine */ int check1_(doublereal *), check2_(doublereal *), header_(void); /* Fortran I/O blocks */ @@ -136,7 +141,12 @@ static doublereal c_b52 = 0.; } s_stop("", (ftnlen)0); - return 0; +#ifdef BLIS_ENABLE_HPX + return bli_thread_finalize_hpx(); +#else + // Return peacefully. + return 0; +#endif } /* main */ /* Subroutine */ int header_(void) @@ -222,7 +232,7 @@ static doublereal c_b52 = 0.; doublecomplex z__1; /* Builtin functions */ - integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); @@ -230,14 +240,14 @@ static doublereal c_b52 = 0.; integer i__; doublecomplex cx[8]; integer np1, len; - extern /* Subroutine */ int zscal_(integer *, doublecomplex *, - doublecomplex *, integer *), ctest_(integer *, doublecomplex *, + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), ctest_(integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *); doublecomplex mwpcs[5], mwpct[5]; extern /* Subroutine */ int itest1_(integer *, integer *); extern doublereal dznrm2_(integer *, doublecomplex *, integer *); - extern /* Subroutine */ int stest1_(doublereal *, doublereal *, - doublereal *, doublereal *), zdscal_(integer *, doublereal *, + extern /* Subroutine */ int stest1_(doublereal *, doublereal *, + doublereal *, doublereal *), zdscal_(integer *, doublereal *, doublecomplex *, integer *); extern integer izamax_(integer *, doublecomplex *, integer *); extern doublereal dzasum_(integer *, doublecomplex *, integer *); @@ -433,7 +443,7 @@ static doublereal c_b52 = 0.; 0.,0.},{0.,0.},{0.,0.},{0.,0.},{.7,-.8},{-.9,.5},{-.4,-.7},{0.,0.} ,{0.,0.},{0.,0.},{0.,0.},{.7,-.8},{-.9,.5},{-.4,-.7},{.1,-.5},{ -.1,-.9},{-.5,-.3},{.2,-.8} }; - static doublecomplex csize1[4] = { {0.,0.},{.9,.9},{1.63,1.73},{2.9,2.78} + static doublecomplex csize1[4] = { {0.,0.},{.9,.9},{1.63,1.73},{2.9,2.78} }; static doublecomplex csize3[14] = { {0.,0.},{0.,0.},{0.,0.},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{1.17,1.17},{1.17,1.17},{1.17,1.17},{1.17, @@ -447,7 +457,7 @@ static doublereal c_b52 = 0.; doublecomplex z__1; /* Builtin functions */ - integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); @@ -457,7 +467,7 @@ static doublereal c_b52 = 0.; integer mx, my; doublecomplex cdot[1]; integer lenx, leny; - extern /* Subroutine */ int ctest_(integer *, doublecomplex *, + extern /* Subroutine */ int ctest_(integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *); extern /* Double Complex */ #ifdef BLIS_ENABLE_COMPLEX_RETURN_INTEL @@ -465,10 +475,10 @@ static doublereal c_b52 = 0.; #else doublecomplex zdotc_( #endif - integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *); integer ksize; - extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); extern /* Double Complex */ #ifdef BLIS_ENABLE_COMPLEX_RETURN_INTEL @@ -476,10 +486,10 @@ doublecomplex zdotc_( #else doublecomplex zdotu_( #endif - integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *); - extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, - doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *); /* Fortran I/O blocks */ @@ -669,11 +679,11 @@ doublecomplex zdotu_( } /* stest_ */ -/* Subroutine */ int stest1_(doublereal *scomp1, doublereal *strue1, +/* Subroutine */ int stest1_(doublereal *scomp1, doublereal *strue1, doublereal *ssize, doublereal *sfac) { doublereal scomp[1], strue[1]; - extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, + extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *); /* ************************* STEST1 ***************************** */ @@ -715,7 +725,7 @@ doublereal sdiff_(doublereal *sa, doublereal *sb) return ret_val; } /* sdiff_ */ -/* Subroutine */ int ctest_(integer *len, doublecomplex *ccomp, doublecomplex +/* Subroutine */ int ctest_(integer *len, doublecomplex *ccomp, doublecomplex *ctrue, doublecomplex *csize, doublereal *sfac) { /* System generated locals */ @@ -727,7 +737,7 @@ doublereal sdiff_(doublereal *sa, doublereal *sb) /* Local variables */ integer i__; doublereal scomp[20], ssize[20], strue[20]; - extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, + extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *); /* **************************** CTEST ***************************** */ diff --git a/blastest/src/zblat2.c b/blastest/src/zblat2.c index 030f03b833..5550b413f6 100644 --- a/blastest/src/zblat2.c +++ b/blastest/src/zblat2.c @@ -157,10 +157,15 @@ static logical c_false = FALSE_; /* ===================================================================== */ /* Main program */ int main(void) { +#ifdef BLIS_ENABLE_HPX + char* program = "zblat2"; + bli_thread_initialize_hpx( 1, &program ); +#endif + /* Initialized data */ - static char snames[6*17] = "ZGEMV " "ZGBMV " "ZHEMV " "ZHBMV " "ZHPMV " - "ZTRMV " "ZTBMV " "ZTPMV " "ZTRSV " "ZTBSV " "ZTPSV " "ZGERC " + static char snames[6*17] = "ZGEMV " "ZGBMV " "ZHEMV " "ZHBMV " "ZHPMV " + "ZTRMV " "ZTBMV " "ZTPMV " "ZTRSV " "ZTBSV " "ZTPSV " "ZGERC " "ZGERU " "ZHER " "ZHPR " "ZHER2 " "ZHPR2 "; /* Format strings */ @@ -208,10 +213,10 @@ static logical c_false = FALSE_; cllist cl__1; /* Builtin functions */ - integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, - char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), - s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, + char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), + s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); @@ -234,53 +239,53 @@ static logical c_false = FALSE_; integer ninc, nbet, ntra; logical rewi; integer nout; - extern /* Subroutine */ int zchk1_(char *, doublereal *, doublereal *, - integer *, integer *, logical *, logical *, logical *, integer *, - integer *, integer *, integer *, integer *, doublecomplex *, - integer *, doublecomplex *, integer *, integer *, integer *, - integer *, doublecomplex *, doublecomplex *, doublecomplex *, + extern /* Subroutine */ int zchk1_(char *, doublereal *, doublereal *, + integer *, integer *, logical *, logical *, logical *, integer *, + integer *, integer *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, - ftnlen), zchk2_(char *, doublereal *, doublereal *, integer *, - integer *, logical *, logical *, logical *, integer *, integer *, - integer *, integer *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *, integer *, integer *, integer *, + ftnlen), zchk2_(char *, doublereal *, doublereal *, integer *, + integer *, logical *, logical *, logical *, integer *, integer *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * - , doublecomplex *, doublecomplex *, doublecomplex *, - doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, - ftnlen), zchk3_(char *, doublereal *, doublereal *, integer *, - integer *, logical *, logical *, logical *, integer *, integer *, - integer *, integer *, integer *, integer *, integer *, integer *, + , doublecomplex *, doublecomplex *, doublecomplex *, + doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, + ftnlen), zchk3_(char *, doublereal *, doublereal *, integer *, + integer *, logical *, logical *, logical *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, - doublecomplex *, ftnlen), zchk4_(char *, doublereal *, - doublereal *, integer *, integer *, logical *, logical *, logical - *, integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, ftnlen), zchk4_(char *, doublereal *, + doublereal *, integer *, integer *, logical *, logical *, logical + *, integer *, integer *, integer *, doublecomplex *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, - doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex - *, doublecomplex *, doublecomplex *, doublecomplex *, + doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex + *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen), zchk5_( - char *, doublereal *, doublereal *, integer *, integer *, logical - *, logical *, logical *, integer *, integer *, integer *, - doublecomplex *, integer *, integer *, integer *, integer *, + char *, doublereal *, doublereal *, integer *, integer *, logical + *, logical *, logical *, integer *, integer *, integer *, + doublecomplex *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * - , doublecomplex *, doublecomplex *, doublecomplex *, - doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, - doublecomplex *, ftnlen), zchk6_(char *, doublereal *, doublereal - *, integer *, integer *, logical *, logical *, logical *, integer - *, integer *, integer *, doublecomplex *, integer *, integer *, - integer *, integer *, doublecomplex *, doublecomplex *, + , doublecomplex *, doublecomplex *, doublecomplex *, + doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, + doublecomplex *, ftnlen), zchk6_(char *, doublereal *, doublereal + *, integer *, integer *, logical *, logical *, logical *, integer + *, integer *, integer *, doublecomplex *, integer *, integer *, + integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * - , doublecomplex *, doublecomplex *, doublecomplex *, + , doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen); logical fatal, trace; integer nidim; extern /* Subroutine */ int zchke_(integer *, char *, integer *, ftnlen); char snaps[32], trans[1]; - extern /* Subroutine */ int zmvch_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, doublereal *, doublecomplex *, doublereal *, + extern /* Subroutine */ int zmvch_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer isnum; logical ltest[17], sfatal; @@ -630,7 +635,7 @@ static logical c_false = FALSE_; goto L80; } for (i__ = 1; i__ <= 17; ++i__) { - if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) + if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L70; } @@ -689,7 +694,7 @@ static logical c_false = FALSE_; /* YY holds the exact result. On exit from ZMVCH YT holds */ /* the result computed by ZMVCH. */ *(unsigned char *)trans = 'N'; - zmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c__1, &c_b1, y, &c__1, yt, g, + zmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c__1, &c_b1, y, &c__1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lze_(yy, yt, &n); if (! same || err != 0.) { @@ -702,7 +707,7 @@ static logical c_false = FALSE_; s_stop("", (ftnlen)0); } *(unsigned char *)trans = 'T'; - zmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c_n1, &c_b1, y, &c_n1, yt, g, + zmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c_n1, &c_b1, y, &c_n1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lze_(yy, yt, &n); if (! same || err != 0.) { @@ -763,44 +768,44 @@ static logical c_false = FALSE_; /* Test ZGEMV, 01, and ZGBMV, 02. */ L140: zchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, - &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, + trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, + &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test ZHEMV, 03, ZHBMV, 04, and ZHPMV, 05. */ L150: zchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, - &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, + trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, + &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test ZTRMV, 06, ZTBMV, 07, ZTPMV, 08, */ /* ZTRSV, 09, ZTBSV, 10, and ZTPSV, 11. */ L160: zchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, + trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, &c__65, &c__2, a, aa, as, y, yy, ys, yt, g, z__, (ftnlen) 6); goto L200; /* Test ZGERC, 12, ZGERU, 13. */ L170: zchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, - inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, + inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test ZHER, 14, and ZHPR, 15. */ L180: zchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, - inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, + inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test ZHER2, 16, and ZHPR2, 17. */ L190: zchk6_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, - inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, + inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); L200: @@ -842,16 +847,21 @@ static logical c_false = FALSE_; /* End of ZBLAT2. */ - return 0; +#ifdef BLIS_ENABLE_HPX + return bli_thread_finalize_hpx(); +#else + // Return peacefully. + return 0; +#endif } /* main */ -/* Subroutine */ int zchk1_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int zchk1_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * - fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, - integer *nalf, doublecomplex *alf, integer *nbet, doublecomplex *bet, - integer *ninc, integer *inc, integer *nmax, integer *incmax, - doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex - *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, + fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, + integer *nalf, doublecomplex *alf, integer *nbet, doublecomplex *bet, + integer *ninc, integer *inc, integer *nmax, integer *incmax, + doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex + *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, ftnlen sname_len) { @@ -881,7 +891,7 @@ static logical c_false = FALSE_; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; alist al__1; @@ -904,7 +914,7 @@ static logical c_false = FALSE_; logical full, tran, null; doublecomplex alpha; logical isame[13]; - extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, + extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); @@ -912,22 +922,22 @@ static logical c_false = FALSE_; logical reset; integer incxs, incys; extern /* Subroutine */ int zgbmv_(char *, integer *, integer *, integer * - , integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, + , integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen); char trans[1]; - extern /* Subroutine */ int zgemv_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), - zmvch_(char *, integer *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - doublereal *, doublecomplex *, doublereal *, doublereal *, + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), + zmvch_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); logical banded; doublereal errmax; doublecomplex transl; - extern logical lzeres_(char *, char *, integer *, integer *, + extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); char transs[1]; @@ -1108,9 +1118,9 @@ static logical c_false = FALSE_; transl.r = 0., transl.i = 0.; i__7 = abs(incy); i__8 = ml - 1; - zmake_("GE", " ", " ", &c__1, &ml, &y[1], + zmake_("GE", " ", " ", &c__1, &ml, &y[1], &c__1, &yy[1], &i__7, &c__0, & - i__8, &reset, &transl, (ftnlen)2, + i__8, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; @@ -1118,7 +1128,7 @@ static logical c_false = FALSE_; /* Save every datum before calling the */ /* subroutine. */ - *(unsigned char *)transs = *(unsigned + *(unsigned char *)transs = *(unsigned char *)trans; ms = m; ns = n; @@ -1129,7 +1139,7 @@ static logical c_false = FALSE_; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; - as[i__8].r = aa[i__9].r, as[i__8].i = + as[i__8].r = aa[i__9].r, as[i__8].i = aa[i__9].i; /* L10: */ } @@ -1138,7 +1148,7 @@ static logical c_false = FALSE_; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; - xs[i__8].r = xx[i__9].r, xs[i__8].i = + xs[i__8].r = xx[i__9].r, xs[i__8].i = xx[i__9].i; /* L20: */ } @@ -1148,7 +1158,7 @@ static logical c_false = FALSE_; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; - ys[i__8].r = yy[i__9].r, ys[i__8].i = + ys[i__8].r = yy[i__9].r, ys[i__8].i = yy[i__9].i; /* L30: */ } @@ -1187,7 +1197,7 @@ static logical c_false = FALSE_; al__1.aunit = *ntra; f_rew(&al__1); } - zgemv_(trans, &m, &n, &alpha, &aa[1], + zgemv_(trans, &m, &n, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, & yy[1], &incy, (ftnlen)1); } else if (banded) { @@ -1248,7 +1258,7 @@ static logical c_false = FALSE_; isame[1] = ms == m; isame[2] = ns == n; if (full) { - isame[3] = als.r == alpha.r && als.i + isame[3] = als.r == alpha.r && als.i == alpha.i; isame[4] = lze_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; @@ -1270,13 +1280,13 @@ static logical c_false = FALSE_; } else if (banded) { isame[3] = kls == kl; isame[4] = kus == ku; - isame[5] = als.r == alpha.r && als.i + isame[5] = als.r == alpha.r && als.i == alpha.i; isame[6] = lze_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lze_(&xs[1], &xx[1], &lx); isame[9] = incxs == incx; - isame[10] = bls.r == beta.r && bls.i + isame[10] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[11] = lze_(&ys[1], &yy[1], & @@ -1318,8 +1328,8 @@ static logical c_false = FALSE_; zmvch_(trans, &m, &n, &alpha, &a[ a_offset], nmax, &x[1], &incx, - &beta, &y[1], &incy, &yt[1], - &g[1], &yy[1], eps, &err, + &beta, &y[1], &incy, &yt[1], + &g[1], &yy[1], eps, &err, fatal, nout, &c_true, (ftnlen) 1); errmax = max(errmax,err); @@ -1423,13 +1433,13 @@ static logical c_false = FALSE_; } /* zchk1_ */ -/* Subroutine */ int zchk2_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int zchk2_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * - fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, - integer *nalf, doublecomplex *alf, integer *nbet, doublecomplex *bet, - integer *ninc, integer *inc, integer *nmax, integer *incmax, - doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex - *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, + fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, + integer *nalf, doublecomplex *alf, integer *nbet, doublecomplex *bet, + integer *ninc, integer *inc, integer *nmax, integer *incmax, + doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex + *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, ftnlen sname_len) { @@ -1463,7 +1473,7 @@ static logical c_false = FALSE_; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; alist al__1; @@ -1472,7 +1482,7 @@ static logical c_false = FALSE_; f_rew(alist *); /* Local variables */ - integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, + integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, laa, lda; doublecomplex als, bls; doublereal err; @@ -1485,31 +1495,31 @@ static logical c_false = FALSE_; char uplo[1]; doublecomplex alpha; logical isame[13]; - extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, + extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; logical reset; integer incxs, incys; - extern /* Subroutine */ int zhbmv_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), - zmvch_(char *, integer *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - doublereal *, doublecomplex *, doublereal *, doublereal *, + extern /* Subroutine */ int zhbmv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), + zmvch_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen), zhemv_(char *, integer * - , doublecomplex *, doublecomplex *, integer *, doublecomplex *, + , doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen); char uplos[1]; - extern /* Subroutine */ int zhpmv_(char *, integer *, doublecomplex *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, + extern /* Subroutine */ int zhpmv_(char *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen); logical banded, packed; doublereal errmax; doublecomplex transl; - extern logical lzeres_(char *, char *, integer *, integer *, + extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ @@ -1672,7 +1682,7 @@ static logical c_false = FALSE_; i__8 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &y[1], & c__1, &yy[1], &i__7, &c__0, &i__8, & - reset, &transl, (ftnlen)2, (ftnlen)1, + reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; @@ -1824,13 +1834,13 @@ static logical c_false = FALSE_; unsigned char *)uplos; isame[1] = ns == n; if (full) { - isame[2] = als.r == alpha.r && als.i == + isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lze_(&as[1], &aa[1], &laa); isame[4] = ldas == lda; isame[5] = lze_(&xs[1], &xx[1], &lx); isame[6] = incxs == incx; - isame[7] = bls.r == beta.r && bls.i == + isame[7] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[8] = lze_(&ys[1], &yy[1], &ly); @@ -1843,13 +1853,13 @@ static logical c_false = FALSE_; isame[9] = incys == incy; } else if (banded) { isame[2] = ks == k; - isame[3] = als.r == alpha.r && als.i == + isame[3] = als.r == alpha.r && als.i == alpha.i; isame[4] = lze_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lze_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; - isame[8] = bls.r == beta.r && bls.i == + isame[8] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[9] = lze_(&ys[1], &yy[1], &ly); @@ -1861,12 +1871,12 @@ static logical c_false = FALSE_; } isame[10] = incys == incy; } else if (packed) { - isame[2] = als.r == alpha.r && als.i == + isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lze_(&as[1], &aa[1], &laa); isame[4] = lze_(&xs[1], &xx[1], &lx); isame[5] = incxs == incx; - isame[6] = bls.r == beta.r && bls.i == + isame[6] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[7] = lze_(&ys[1], &yy[1], &ly); @@ -1904,8 +1914,8 @@ static logical c_false = FALSE_; /* Check the result. */ - zmvch_("N", &n, &n, &alpha, &a[a_offset], - nmax, &x[1], &incx, &beta, &y[1], + zmvch_("N", &n, &n, &alpha, &a[a_offset], + nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); @@ -2015,12 +2025,12 @@ static logical c_false = FALSE_; } /* zchk2_ */ -/* Subroutine */ int zchk3_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int zchk3_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * - fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, - integer *ninc, integer *inc, integer *nmax, integer *incmax, - doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex - *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *xt, + fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, + integer *ninc, integer *inc, integer *nmax, integer *incmax, + doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex + *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *xt, doublereal *g, doublecomplex *z__, ftnlen sname_len) { /* Initialized data */ @@ -2060,7 +2070,7 @@ static logical c_false = FALSE_; integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ - integer i__, k, n, nc, ik, in, nk, ks, ix, ns, lx, laa, icd, lda, ict, + integer i__, k, n, nc, ik, in, nk, ks, ix, ns, lx, laa, icd, lda, ict, icu; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); @@ -2071,7 +2081,7 @@ static logical c_false = FALSE_; logical full, null; char uplo[1], diags[1]; logical isame[13]; - extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, + extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); @@ -2079,28 +2089,28 @@ static logical c_false = FALSE_; logical reset; integer incxs; char trans[1]; - extern /* Subroutine */ int zmvch_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, doublereal *, doublecomplex *, doublereal *, + extern /* Subroutine */ int zmvch_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); char uplos[1]; - extern /* Subroutine */ int ztbmv_(char *, char *, char *, integer *, + extern /* Subroutine */ int ztbmv_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztbsv_(char *, char *, char *, integer * - , integer *, doublecomplex *, integer *, doublecomplex *, integer - *, ftnlen, ftnlen, ftnlen), ztpmv_(char *, char *, char *, - integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, - ftnlen, ftnlen), ztrmv_(char *, char *, char *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, - ftnlen, ftnlen), ztpsv_(char *, char *, char *, integer *, - doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen, + , integer *, doublecomplex *, integer *, doublecomplex *, integer + *, ftnlen, ftnlen, ftnlen), ztpmv_(char *, char *, char *, + integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, + ftnlen, ftnlen), ztrmv_(char *, char *, char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, + ftnlen, ftnlen), ztpsv_(char *, char *, char *, integer *, + doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztrsv_(char *, char *, char *, integer *, doublecomplex * , integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen); logical banded, packed; doublereal errmax; doublecomplex transl; - extern logical lzeres_(char *, char *, integer *, integer *, + extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); char transs[1]; @@ -2226,13 +2236,13 @@ static logical c_false = FALSE_; ; for (icd = 1; icd <= 2; ++icd) { - *(unsigned char *)diag = *(unsigned char *)&ichd[icd + *(unsigned char *)diag = *(unsigned char *)&ichd[icd - 1]; /* Generate the matrix A. */ transl.r = 0., transl.i = 0.; - zmake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], + zmake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], nmax, &aa[1], &lda, &k, &k, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -2287,7 +2297,7 @@ static logical c_false = FALSE_; /* Call the subroutine. */ - if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) + if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) == 0) { if (full) { if (*trace) { @@ -2340,7 +2350,7 @@ static logical c_false = FALSE_; al__1.aunit = *ntra; f_rew(&al__1); } - ztbmv_(uplo, trans, diag, &n, &k, &aa[1], + ztbmv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { @@ -2421,7 +2431,7 @@ static logical c_false = FALSE_; al__1.aunit = *ntra; f_rew(&al__1); } - ztbsv_(uplo, trans, diag, &n, &k, &aa[1], + ztbsv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { @@ -2463,11 +2473,11 @@ static logical c_false = FALSE_; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)uplo == *(unsigned + isame[0] = *(unsigned char *)uplo == *(unsigned char *)uplos; - isame[1] = *(unsigned char *)trans == *(unsigned + isame[1] = *(unsigned char *)trans == *(unsigned char *)transs; - isame[2] = *(unsigned char *)diag == *(unsigned + isame[2] = *(unsigned char *)diag == *(unsigned char *)diags; isame[3] = ns == n; if (full) { @@ -2537,7 +2547,7 @@ static logical c_false = FALSE_; zmvch_(trans, &n, &n, &c_b2, &a[a_offset], nmax, &x[1], &incx, &c_b1, &z__[ - 1], &incx, &xt[1], &g[1], &xx[1], + 1], &incx, &xt[1], &g[1], &xx[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( @@ -2549,18 +2559,18 @@ static logical c_false = FALSE_; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = (i__ - 1) * abs(incx) + 1; - z__[i__5].r = xx[i__6].r, z__[i__5].i + z__[i__5].r = xx[i__6].r, z__[i__5].i = xx[i__6].i; i__5 = (i__ - 1) * abs(incx) + 1; i__6 = i__; - xx[i__5].r = x[i__6].r, xx[i__5].i = + xx[i__5].r = x[i__6].r, xx[i__5].i = x[i__6].i; /* L50: */ } zmvch_(trans, &n, &n, &c_b2, &a[a_offset], nmax, &z__[1], &incx, &c_b1, &x[ - 1], &incx, &xt[1], &g[1], &xx[1], - eps, &err, fatal, nout, &c_false, + 1], &incx, &xt[1], &g[1], &xx[1], + eps, &err, fatal, nout, &c_false, (ftnlen)1); } errmax = max(errmax,err); @@ -2662,12 +2672,12 @@ static logical c_false = FALSE_; } /* zchk3_ */ -/* Subroutine */ int zchk4_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int zchk4_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * - alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, - doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex - *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, + alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, + doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex + *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, doublecomplex *z__, ftnlen sname_len) { @@ -2713,26 +2723,26 @@ static logical c_false = FALSE_; logical null; doublecomplex alpha; logical isame[13]; - extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, + extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; - extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, + extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); logical reset; integer incxs, incys; - extern /* Subroutine */ int zmvch_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, doublereal *, doublecomplex *, doublereal *, + extern /* Subroutine */ int zmvch_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen), zgeru_( integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); doublereal errmax; doublecomplex transl; - extern logical lzeres_(char *, char *, integer *, integer *, + extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ @@ -2834,7 +2844,7 @@ static logical c_false = FALSE_; i__3 = abs(incx); i__4 = m - 1; zmake_("GE", " ", " ", &c__1, &m, &x[1], &c__1, &xx[1], &i__3, - &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, + &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (m > 1) { i__3 = m / 2; @@ -2873,7 +2883,7 @@ static logical c_false = FALSE_; transl.r = 0., transl.i = 0.; i__5 = m - 1; i__6 = n - 1; - zmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], + zmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -3032,9 +3042,9 @@ static logical c_false = FALSE_; d_cnjg(&z__1, w); w[0].r = z__1.r, w[0].i = z__1.i; } - zmvch_("N", &m, &c__1, &alpha, &z__[1], nmax, + zmvch_("N", &m, &c__1, &alpha, &z__[1], nmax, w, &c__1, &c_b2, &a[j * a_dim1 + 1], & - c__1, &yt[1], &g[1], &aa[(j - 1) * + c__1, &yt[1], &g[1], &aa[(j - 1) * lda + 1], eps, &err, fatal, nout, & c_true, (ftnlen)1); errmax = max(errmax,err); @@ -3114,12 +3124,12 @@ static logical c_false = FALSE_; } /* zchk4_ */ -/* Subroutine */ int zchk5_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int zchk5_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * - alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, - doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex - *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, + alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, + doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex + *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, doublecomplex *z__, ftnlen sname_len) { @@ -3169,32 +3179,32 @@ static logical c_false = FALSE_; doublereal rals; integer incx; logical full; - extern /* Subroutine */ int zher_(char *, integer *, doublereal *, + extern /* Subroutine */ int zher_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen); logical null; char uplo[1]; - extern /* Subroutine */ int zhpr_(char *, integer *, doublereal *, + extern /* Subroutine */ int zhpr_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, ftnlen); doublecomplex alpha; logical isame[13]; - extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, + extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; logical reset; integer incxs; - extern /* Subroutine */ int zmvch_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, doublereal *, doublecomplex *, doublereal *, + extern /* Subroutine */ int zmvch_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); logical upper; char uplos[1]; logical packed; doublereal ralpha, errmax; doublecomplex transl; - extern logical lzeres_(char *, char *, integer *, integer *, + extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ @@ -3297,7 +3307,7 @@ static logical c_false = FALSE_; i__3 = abs(incx); i__4 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, - &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, + &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__3 = n / 2; @@ -3372,7 +3382,7 @@ static logical c_false = FALSE_; al__1.aunit = *ntra; f_rew(&al__1); } - zher_(uplo, &n, &ralpha, &xx[1], &incx, &aa[1], &lda, + zher_(uplo, &n, &ralpha, &xx[1], &incx, &aa[1], &lda, (ftnlen)1); } else if (packed) { if (*trace) { @@ -3482,9 +3492,9 @@ static logical c_false = FALSE_; jj = j; lj = n - j + 1; } - zmvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, - &c__1, &c_b2, &a[jj + j * a_dim1], &c__1, - &yt[1], &g[1], &aa[ja], eps, &err, fatal, + zmvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, + &c__1, &c_b2, &a[jj + j * a_dim1], &c__1, + &yt[1], &g[1], &aa[ja], eps, &err, fatal, nout, &c_true, (ftnlen)1); if (full) { if (upper) { @@ -3582,12 +3592,12 @@ static logical c_false = FALSE_; } /* zchk5_ */ -/* Subroutine */ int zchk6_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int zchk6_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * - alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, - doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex - *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, + alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, + doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex + *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, doublecomplex *z__, ftnlen sname_len) { @@ -3617,7 +3627,7 @@ static logical c_false = FALSE_; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, + integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublecomplex z__1, z__2, z__3; alist al__1; @@ -3639,31 +3649,31 @@ static logical c_false = FALSE_; integer incx, incy; logical full, null; char uplo[1]; - extern /* Subroutine */ int zher2_(char *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *, ftnlen), zhpr2_(char *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, + extern /* Subroutine */ int zher2_(char *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, ftnlen), zhpr2_(char *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, ftnlen); doublecomplex alpha; logical isame[13]; - extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, + extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; logical reset; integer incxs, incys; - extern /* Subroutine */ int zmvch_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, doublereal *, doublecomplex *, doublereal *, + extern /* Subroutine */ int zmvch_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); logical upper; char uplos[1]; logical packed; doublereal errmax; doublecomplex transl; - extern logical lzeres_(char *, char *, integer *, integer *, + extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ @@ -3768,7 +3778,7 @@ static logical c_false = FALSE_; i__3 = abs(incx); i__4 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, - &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, + &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__3 = n / 2; @@ -3808,7 +3818,7 @@ static logical c_false = FALSE_; transl.r = 0., transl.i = 0.; i__5 = n - 1; i__6 = n - 1; - zmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], + zmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -3996,14 +4006,14 @@ static logical c_false = FALSE_; i__5 = n; for (j = 1; j <= i__5; ++j) { d_cnjg(&z__2, &z__[j + (z_dim1 << 1)]); - z__1.r = alpha.r * z__2.r - alpha.i * z__2.i, - z__1.i = alpha.r * z__2.i + alpha.i * + z__1.r = alpha.r * z__2.r - alpha.i * z__2.i, + z__1.i = alpha.r * z__2.i + alpha.i * z__2.r; w[0].r = z__1.r, w[0].i = z__1.i; d_cnjg(&z__2, &alpha); d_cnjg(&z__3, &z__[j + z_dim1]); - z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, - z__1.i = z__2.r * z__3.i + z__2.i * + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, + z__1.i = z__2.r * z__3.i + z__2.i * z__3.r; w[1].r = z__1.r, w[1].i = z__1.i; if (upper) { @@ -4013,8 +4023,8 @@ static logical c_false = FALSE_; jj = j; lj = n - j + 1; } - zmvch_("N", &lj, &c__2, &c_b2, &z__[jj + - z_dim1], nmax, w, &c__1, &c_b2, &a[jj + zmvch_("N", &lj, &c__2, &c_b2, &z__[jj + + z_dim1], nmax, w, &c__1, &c_b2, &a[jj + j * a_dim1], &c__1, &yt[1], &g[1], & aa[ja], eps, &err, fatal, nout, & c_true, (ftnlen)1); @@ -4119,7 +4129,7 @@ static logical c_false = FALSE_; } /* zchk6_ */ -/* Subroutine */ int zchke_(integer *isnum, char *srnamt, integer *nout, +/* Subroutine */ int zchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ @@ -4133,47 +4143,47 @@ static logical c_false = FALSE_; /* Local variables */ doublecomplex a[1] /* was [1][1] */, x[1], y[1], beta; - extern /* Subroutine */ int zher_(char *, integer *, doublereal *, - doublecomplex *, integer *, doublecomplex *, integer *, ftnlen), + extern /* Subroutine */ int zher_(char *, integer *, doublereal *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen), zhpr_(char *, integer *, doublereal *, doublecomplex *, integer *, - doublecomplex *, ftnlen), zher2_(char *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, integer *, ftnlen), zhpr2_(char *, - integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, ftnlen), zher2_(char *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, ftnlen), zhpr2_(char *, + integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, ftnlen); doublecomplex alpha; - extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *), zgbmv_(char *, integer *, integer *, + extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zgbmv_(char *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, ftnlen), zhbmv_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), - zgemv_(char *, integer *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, doublecomplex *, integer *, ftnlen), zhemv_(char - *, integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, ftnlen), zgeru_(integer *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *), ztbmv_(char *, char *, char *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, ftnlen), zhbmv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), + zgemv_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, ftnlen), zhemv_(char + *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, ftnlen), zgeru_(integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), ztbmv_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, - integer *, ftnlen, ftnlen, ftnlen), zhpmv_(char *, integer *, - doublecomplex *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, doublecomplex *, integer *, ftnlen), ztbsv_(char - *, char *, char *, integer *, integer *, doublecomplex *, integer + integer *, ftnlen, ftnlen, ftnlen), zhpmv_(char *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, ftnlen), ztbsv_(char + *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztpmv_( - char *, char *, char *, integer *, doublecomplex *, doublecomplex - *, integer *, ftnlen, ftnlen, ftnlen), ztrmv_(char *, char *, - char *, integer *, doublecomplex *, integer *, doublecomplex *, + char *, char *, char *, integer *, doublecomplex *, doublecomplex + *, integer *, ftnlen, ftnlen, ftnlen), ztrmv_(char *, char *, + char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztpsv_(char *, char *, char *, - integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, - ftnlen, ftnlen), ztrsv_(char *, char *, char *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, + integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, + ftnlen, ftnlen), ztrsv_(char *, char *, char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen); doublereal ralpha; - extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical + extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ @@ -4702,9 +4712,9 @@ static logical c_false = FALSE_; } /* zchke_ */ -/* Subroutine */ int zmake_(char *type__, char *uplo, char *diag, integer *m, - integer *n, doublecomplex *a, integer *nmax, doublecomplex *aa, - integer *lda, integer *kl, integer *ku, logical *reset, doublecomplex +/* Subroutine */ int zmake_(char *type__, char *uplo, char *diag, integer *m, + integer *n, doublecomplex *a, integer *nmax, doublecomplex *aa, + integer *lda, integer *kl, integer *ku, logical *reset, doublecomplex *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ @@ -4765,7 +4775,7 @@ static logical c_false = FALSE_; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { - if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) + if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) { i__3 = i__ + j * a_dim1; zbeg_(&z__2, reset); @@ -4998,11 +5008,11 @@ static logical c_false = FALSE_; } /* zmake_ */ -/* Subroutine */ int zmvch_(char *trans, integer *m, integer *n, +/* Subroutine */ int zmvch_(char *trans, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, integer *nmax, doublecomplex * x, integer *incx, doublecomplex *beta, doublecomplex *y, integer * - incy, doublecomplex *yt, doublereal *g, doublecomplex *yy, doublereal - *eps, doublereal *err, logical *fatal, integer *nout, logical *mv, + incy, doublecomplex *yt, doublereal *g, doublecomplex *yy, doublereal + *eps, doublereal *err, logical *fatal, integer *nout, logical *mv, ftnlen trans_len) { /* Format strings */ @@ -5105,15 +5115,15 @@ static logical c_false = FALSE_; i__4 = iy; i__5 = j + i__ * a_dim1; i__6 = jx; - z__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, + z__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, z__2.i = a[i__5].r * x[i__6].i + a[i__5].i * x[i__6] .r; z__1.r = yt[i__4].r + z__2.r, z__1.i = yt[i__4].i + z__2.i; yt[i__3].r = z__1.r, yt[i__3].i = z__1.i; i__3 = j + i__ * a_dim1; i__4 = jx; - g[iy] += ((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j - + i__ * a_dim1]), abs(d__2))) * ((d__3 = x[i__4].r, + g[iy] += ((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j + + i__ * a_dim1]), abs(d__2))) * ((d__3 = x[i__4].r, abs(d__3)) + (d__4 = d_imag(&x[jx]), abs(d__4))); jx += incxl; /* L10: */ @@ -5125,14 +5135,14 @@ static logical c_false = FALSE_; i__4 = iy; d_cnjg(&z__3, &a[j + i__ * a_dim1]); i__5 = jx; - z__2.r = z__3.r * x[i__5].r - z__3.i * x[i__5].i, z__2.i = + z__2.r = z__3.r * x[i__5].r - z__3.i * x[i__5].i, z__2.i = z__3.r * x[i__5].i + z__3.i * x[i__5].r; z__1.r = yt[i__4].r + z__2.r, z__1.i = yt[i__4].i + z__2.i; yt[i__3].r = z__1.r, yt[i__3].i = z__1.i; i__3 = j + i__ * a_dim1; i__4 = jx; - g[iy] += ((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j - + i__ * a_dim1]), abs(d__2))) * ((d__3 = x[i__4].r, + g[iy] += ((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j + + i__ * a_dim1]), abs(d__2))) * ((d__3 = x[i__4].r, abs(d__3)) + (d__4 = d_imag(&x[jx]), abs(d__4))); jx += incxl; /* L20: */ @@ -5144,7 +5154,7 @@ static logical c_false = FALSE_; i__4 = iy; i__5 = i__ + j * a_dim1; i__6 = jx; - z__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, + z__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, z__2.i = a[i__5].r * x[i__6].i + a[i__5].i * x[i__6] .r; z__1.r = yt[i__4].r + z__2.r, z__1.i = yt[i__4].i + z__2.i; @@ -5152,7 +5162,7 @@ static logical c_false = FALSE_; i__3 = i__ + j * a_dim1; i__4 = jx; g[iy] += ((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[ - i__ + j * a_dim1]), abs(d__2))) * ((d__3 = x[i__4].r, + i__ + j * a_dim1]), abs(d__2))) * ((d__3 = x[i__4].r, abs(d__3)) + (d__4 = d_imag(&x[jx]), abs(d__4))); jx += incxl; /* L30: */ @@ -5160,7 +5170,7 @@ static logical c_false = FALSE_; } i__2 = iy; i__3 = iy; - z__2.r = alpha->r * yt[i__3].r - alpha->i * yt[i__3].i, z__2.i = + z__2.r = alpha->r * yt[i__3].r - alpha->i * yt[i__3].i, z__2.i = alpha->r * yt[i__3].i + alpha->i * yt[i__3].r; i__4 = iy; z__3.r = beta->r * y[i__4].r - beta->i * y[i__4].i, z__3.i = beta->r * @@ -5169,7 +5179,7 @@ static logical c_false = FALSE_; yt[i__2].r = z__1.r, yt[i__2].i = z__1.i; i__2 = iy; g[iy] = ((d__1 = alpha->r, abs(d__1)) + (d__2 = d_imag(alpha), abs( - d__2))) * g[iy] + ((d__3 = beta->r, abs(d__3)) + (d__4 = + d__2))) * g[iy] + ((d__3 = beta->r, abs(d__3)) + (d__4 = d_imag(beta), abs(d__4))) * ((d__5 = y[i__2].r, abs(d__5)) + ( d__6 = d_imag(&y[iy]), abs(d__6))); iy += incyl; @@ -5281,8 +5291,8 @@ logical lze_(doublecomplex *ri, doublecomplex *rj, integer *lr) } /* lze_ */ -logical lzeres_(char *type__, char *uplo, integer *m, integer *n, - doublecomplex *aa, doublecomplex *as, integer *lda, ftnlen type_len, +logical lzeres_(char *type__, char *uplo, integer *m, integer *n, + doublecomplex *aa, doublecomplex *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ @@ -5459,7 +5469,7 @@ doublereal ddiff_(doublereal *x, doublereal *y) } /* ddiff_ */ -/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, +/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ diff --git a/blastest/src/zblat3.c b/blastest/src/zblat3.c index 3ff3634b68..045eeba420 100644 --- a/blastest/src/zblat3.c +++ b/blastest/src/zblat3.c @@ -140,9 +140,14 @@ static integer c_n1 = -1; /* ===================================================================== */ /* Main program */ int main(void) { +#ifdef BLIS_ENABLE_HPX + char* program = "zblat3"; + bli_thread_initialize_hpx( 1, &program ); +#endif + /* Initialized data */ - static char snames[6*9] = "ZGEMM " "ZHEMM " "ZSYMM " "ZTRMM " "ZTRSM " + static char snames[6*9] = "ZGEMM " "ZHEMM " "ZSYMM " "ZTRMM " "ZTRSM " "ZHERK " "ZSYRK " "ZHER2K" "ZSYR2K"; /* Format strings */ @@ -186,10 +191,10 @@ static integer c_n1 = -1; cllist cl__1; /* Builtin functions */ - integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), + integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, - char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), - s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, + char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), + s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); @@ -208,44 +213,44 @@ static integer c_n1 = -1; integer nbet, ntra; logical rewi; integer nout; - extern /* Subroutine */ int zchk1_(char *, doublereal *, doublereal *, - integer *, integer *, logical *, logical *, logical *, integer *, + extern /* Subroutine */ int zchk1_(char *, doublereal *, doublereal *, + integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, - ftnlen), zchk2_(char *, doublereal *, doublereal *, integer *, - integer *, logical *, logical *, logical *, integer *, integer *, + ftnlen), zchk2_(char *, doublereal *, doublereal *, integer *, + integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex - *, doublecomplex *, doublecomplex *, doublecomplex *, - doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, - ftnlen), zchk3_(char *, doublereal *, doublereal *, integer *, - integer *, logical *, logical *, logical *, integer *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex + *, doublecomplex *, doublecomplex *, doublecomplex *, + doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, + ftnlen), zchk3_(char *, doublereal *, doublereal *, integer *, + integer *, logical *, logical *, logical *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, - ftnlen), zchk4_(char *, doublereal *, doublereal *, integer *, - integer *, logical *, logical *, logical *, integer *, integer *, + ftnlen), zchk4_(char *, doublereal *, doublereal *, integer *, + integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex - *, doublecomplex *, doublecomplex *, doublecomplex *, - doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, - ftnlen), zchk5_(char *, doublereal *, doublereal *, integer *, - integer *, logical *, logical *, logical *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex + *, doublecomplex *, doublecomplex *, doublecomplex *, + doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, + ftnlen), zchk5_(char *, doublereal *, doublereal *, integer *, + integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex - *, doublecomplex *, doublecomplex *, doublecomplex *, - doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, + doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex + *, doublecomplex *, doublecomplex *, doublecomplex *, + doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen); logical fatal, trace; integer nidim; - extern /* Subroutine */ int zchke_(integer *, char *, integer *, ftnlen), - zmmch_(char *, char *, integer *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, doublereal *, doublecomplex *, integer *, - doublereal *, doublereal *, logical *, integer *, logical *, + extern /* Subroutine */ int zchke_(integer *, char *, integer *, ftnlen), + zmmch_(char *, char *, integer *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, doublereal *, doublecomplex *, integer *, + doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); char snaps[32]; integer isnum; @@ -517,7 +522,7 @@ static integer c_n1 = -1; goto L60; } for (i__ = 1; i__ <= 9; ++i__) { - if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) + if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L50; } @@ -580,7 +585,7 @@ static integer c_n1 = -1; *(unsigned char *)transa = 'N'; *(unsigned char *)transb = 'N'; zmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & - c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, + c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lze_(cc, ct, &n); if (! same || err != 0.) { @@ -595,7 +600,7 @@ static integer c_n1 = -1; } *(unsigned char *)transb = 'C'; zmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & - c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, + c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lze_(cc, ct, &n); if (! same || err != 0.) { @@ -628,7 +633,7 @@ static integer c_n1 = -1; *(unsigned char *)transa = 'C'; *(unsigned char *)transb = 'N'; zmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & - c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, + c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lze_(cc, ct, &n); if (! same || err != 0.) { @@ -643,7 +648,7 @@ static integer c_n1 = -1; } *(unsigned char *)transb = 'C'; zmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & - c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, + c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lze_(cc, ct, &n); if (! same || err != 0.) { @@ -697,34 +702,34 @@ static integer c_n1 = -1; /* Test ZGEMM, 01. */ L140: zchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, - bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test ZHEMM, 02, ZSYMM, 03. */ L150: zchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, - bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test ZTRMM, 04, ZTRSM, 05. */ L160: zchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, ab, aa, as, &ab[4225], bb, bs, ct, g, c__, (ftnlen)6); goto L190; /* Test ZHERK, 06, ZSYRK, 07. */ L170: zchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, - bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test ZHER2K, 08, ZSYR2K, 09. */ L180: zchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & - trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, + trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, bb, bs, c__, cc, cs, ct, g, w, ( ftnlen)6); goto L190; @@ -768,15 +773,20 @@ static integer c_n1 = -1; /* End of ZBLAT3. */ - return 0; +#ifdef BLIS_ENABLE_HPX + return bli_thread_finalize_hpx(); +#else + // Return peacefully. + return 0; +#endif } /* main */ -/* Subroutine */ int zchk1_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int zchk1_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *nbet, doublecomplex *bet, integer *nmax, doublecomplex * - a, doublecomplex *aa, doublecomplex *as, doublecomplex *b, - doublecomplex *bb, doublecomplex *bs, doublecomplex *c__, + a, doublecomplex *aa, doublecomplex *as, doublecomplex *b, + doublecomplex *bb, doublecomplex *bs, doublecomplex *c__, doublecomplex *cc, doublecomplex *cs, doublecomplex *ct, doublereal * g, ftnlen sname_len) { @@ -802,7 +812,7 @@ static integer c_n1 = -1; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; @@ -811,7 +821,7 @@ static integer c_n1 = -1; f_rew(alist *); /* Local variables */ - integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, + integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, ica, icb, laa, lbb, lda, lcc, ldb, ldc; doublecomplex als, bls; doublereal err; @@ -821,23 +831,23 @@ static integer c_n1 = -1; logical same, null; doublecomplex alpha; logical isame[13], trana, tranb; - extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, + extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; - extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *, doublereal *, doublecomplex *, - integer *, doublereal *, doublereal *, logical *, integer *, - logical *, ftnlen, ftnlen), zgemm_(char *, char *, integer *, + extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, doublereal *, doublecomplex *, + integer *, doublereal *, doublereal *, logical *, integer *, + logical *, ftnlen, ftnlen), zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); logical reset; char tranas[1], tranbs[1], transa[1], transb[1]; doublereal errmax; - extern logical lzeres_(char *, char *, integer *, integer *, + extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ @@ -928,7 +938,7 @@ static integer c_n1 = -1; for (ica = 1; ica <= 3; ++ica) { *(unsigned char *)transa = *(unsigned char *)&ich[ica - 1] ; - trana = *(unsigned char *)transa == 'T' || *(unsigned + trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; if (trana) { @@ -956,9 +966,9 @@ static integer c_n1 = -1; ftnlen)1); for (icb = 1; icb <= 3; ++icb) { - *(unsigned char *)transb = *(unsigned char *)&ich[icb + *(unsigned char *)transb = *(unsigned char *)&ich[icb - 1]; - tranb = *(unsigned char *)transb == 'T' || *(unsigned + tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; if (tranb) { @@ -1099,13 +1109,13 @@ static integer c_n1 = -1; isame[2] = ms == m; isame[3] = ns == n; isame[4] = ks == k; - isame[5] = als.r == alpha.r && als.i == + isame[5] = als.r == alpha.r && als.i == alpha.i; isame[6] = lze_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lze_(&bs[1], &bb[1], &lbb); isame[9] = ldbs == ldb; - isame[10] = bls.r == beta.r && bls.i == + isame[10] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[11] = lze_(&cs[1], &cc[1], &lcc); @@ -1143,9 +1153,9 @@ static integer c_n1 = -1; zmmch_(transa, transb, &m, &n, &k, &alpha, &a[a_offset], nmax, &b[b_offset], - nmax, &beta, &c__[c_offset], + nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, - eps, &err, fatal, nout, &c_true, + eps, &err, fatal, nout, &c_true, (ftnlen)1, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ @@ -1226,12 +1236,12 @@ static integer c_n1 = -1; } /* zchk1_ */ -/* Subroutine */ int zchk2_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int zchk2_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *nbet, doublecomplex *bet, integer *nmax, doublecomplex * - a, doublecomplex *aa, doublecomplex *as, doublecomplex *b, - doublecomplex *bb, doublecomplex *bs, doublecomplex *c__, + a, doublecomplex *aa, doublecomplex *as, doublecomplex *b, + doublecomplex *bb, doublecomplex *bs, doublecomplex *c__, doublecomplex *cc, doublecomplex *cs, doublecomplex *ct, doublereal * g, ftnlen sname_len) { @@ -1258,7 +1268,7 @@ static integer c_n1 = -1; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; alist al__1; @@ -1267,7 +1277,7 @@ static integer c_n1 = -1; integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ - integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, + integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, ldb, ldc, ics; doublecomplex als, bls; integer icu; @@ -1282,27 +1292,27 @@ static integer c_n1 = -1; doublecomplex alpha; logical isame[13]; char sides[1]; - extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, + extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; - extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *, doublereal *, doublecomplex *, - integer *, doublereal *, doublereal *, logical *, integer *, - logical *, ftnlen, ftnlen), zhemm_(char *, char *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, + extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, doublereal *, doublecomplex *, + integer *, doublereal *, doublereal *, logical *, integer *, + logical *, ftnlen, ftnlen), zhemm_(char *, char *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); logical reset; char uplos[1]; - extern /* Subroutine */ int zsymm_(char *, char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, + extern /* Subroutine */ int zsymm_(char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); doublereal errmax; - extern logical lzeres_(char *, char *, integer *, integer *, + extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ @@ -1443,7 +1453,7 @@ static integer c_n1 = -1; /* Generate the matrix C. */ - zmake_("GE", " ", " ", &m, &n, &c__[c_offset], + zmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -1539,9 +1549,9 @@ static integer c_n1 = -1; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)sides == *(unsigned + isame[0] = *(unsigned char *)sides == *(unsigned char *)side; - isame[1] = *(unsigned char *)uplos == *(unsigned + isame[1] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[2] = ms == m; isame[3] = ns == n; @@ -1586,14 +1596,14 @@ static integer c_n1 = -1; if (left) { zmmch_("N", "N", &m, &n, &m, &alpha, &a[ - a_offset], nmax, &b[b_offset], + a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { zmmch_("N", "N", &m, &n, &n, &alpha, &b[ - b_offset], nmax, &a[a_offset], + b_offset], nmax, &a[a_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( @@ -1673,12 +1683,12 @@ static integer c_n1 = -1; } /* zchk2_ */ -/* Subroutine */ int zchk3_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int zchk3_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * - alf, integer *nmax, doublecomplex *a, doublecomplex *aa, - doublecomplex *as, doublecomplex *b, doublecomplex *bb, doublecomplex - *bs, doublecomplex *ct, doublereal *g, doublecomplex *c__, ftnlen + alf, integer *nmax, doublecomplex *a, doublecomplex *aa, + doublecomplex *as, doublecomplex *b, doublecomplex *bb, doublecomplex + *bs, doublecomplex *ct, doublereal *g, doublecomplex *c__, ftnlen sname_len) { /* Initialized data */ @@ -1705,7 +1715,7 @@ static integer c_n1 = -1; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublecomplex z__1; alist al__1; @@ -1731,27 +1741,27 @@ static integer c_n1 = -1; char diags[1]; logical isame[13]; char sides[1]; - extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, + extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; - extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *, doublereal *, doublecomplex *, - integer *, doublereal *, doublereal *, logical *, integer *, + extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, doublereal *, doublecomplex *, + integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical reset; char uplos[1]; - extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, + extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), - ztrsm_(char *, char *, char *, char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); char tranas[1], transa[1]; doublereal errmax; - extern logical lzeres_(char *, char *, integer *, integer *, + extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ @@ -1888,7 +1898,7 @@ static integer c_n1 = -1; /* Generate the matrix B. */ - zmake_("GE", " ", " ", &m, &n, &b[b_offset], + zmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); @@ -1960,7 +1970,7 @@ static integer c_n1 = -1; } ztrmm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, - (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } else if (s_cmp(sname + 3, "SM", (ftnlen)2, ( ftnlen)2) == 0) { @@ -1993,7 +2003,7 @@ static integer c_n1 = -1; } ztrsm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, - (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } @@ -2019,7 +2029,7 @@ static integer c_n1 = -1; unsigned char *)diag; isame[4] = ms == m; isame[5] = ns == n; - isame[6] = als.r == alpha.r && als.i == + isame[6] = als.r == alpha.r && als.i == alpha.i; isame[7] = lze_(&as[1], &aa[1], &laa); isame[8] = ldas == lda; @@ -2063,18 +2073,18 @@ static integer c_n1 = -1; zmmch_(transa, "N", &m, &n, &m, & alpha, &a[a_offset], nmax, &b[b_offset], nmax, & - c_b1, &c__[c_offset], + c_b1, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ - 1], &ldb, eps, &err, + 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { zmmch_("N", transa, &m, &n, &n, & alpha, &b[b_offset], nmax, &a[a_offset], nmax, & - c_b1, &c__[c_offset], + c_b1, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ - 1], &ldb, eps, &err, + 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } @@ -2087,14 +2097,14 @@ static integer c_n1 = -1; i__4 = n; for (j = 1; j <= i__4; ++j) { i__5 = m; - for (i__ = 1; i__ <= i__5; ++i__) + for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + j * c_dim1; i__7 = i__ + (j - 1) * ldb; c__[i__6].r = bb[i__7].r, c__[i__6].i = bb[i__7].i; i__6 = i__ + (j - 1) * ldb; i__7 = i__ + j * b_dim1; - z__1.r = alpha.r * b[i__7].r - alpha.i * b[i__7].i, + z__1.r = alpha.r * b[i__7].r - alpha.i * b[i__7].i, z__1.i = alpha.r * b[i__7].i + alpha.i * b[ i__7].r; bb[i__6].r = z__1.r, bb[i__6].i = z__1.i; @@ -2105,20 +2115,20 @@ static integer c_n1 = -1; if (left) { zmmch_(transa, "N", &m, &n, &m, & - c_b2, &a[a_offset], nmax, + c_b2, &a[a_offset], nmax, &c__[c_offset], nmax, & - c_b1, &b[b_offset], nmax, + c_b1, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & - ldb, eps, &err, fatal, + ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } else { zmmch_("N", transa, &m, &n, &n, & - c_b2, &c__[c_offset], - nmax, &a[a_offset], nmax, + c_b2, &c__[c_offset], + nmax, &a[a_offset], nmax, &c_b1, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & - ldb, eps, &err, fatal, + ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } @@ -2199,12 +2209,12 @@ static integer c_n1 = -1; } /* zchk3_ */ -/* Subroutine */ int zchk4_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int zchk4_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *nbet, doublecomplex *bet, integer *nmax, doublecomplex * - a, doublecomplex *aa, doublecomplex *as, doublecomplex *b, - doublecomplex *bb, doublecomplex *bs, doublecomplex *c__, + a, doublecomplex *aa, doublecomplex *as, doublecomplex *b, + doublecomplex *bb, doublecomplex *bs, doublecomplex *c__, doublecomplex *cc, doublecomplex *cs, doublecomplex *ct, doublereal * g, ftnlen sname_len) { @@ -2236,7 +2246,7 @@ static integer c_n1 = -1; "ER:\002)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublecomplex z__1; alist al__1; @@ -2262,29 +2272,29 @@ static integer c_n1 = -1; doublecomplex alpha; doublereal rbeta; logical isame[13]; - extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, + extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; - extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *, doublereal *, doublecomplex *, - integer *, doublereal *, doublereal *, logical *, integer *, + extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, doublereal *, doublecomplex *, + integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); doublereal rbets; logical reset; - extern /* Subroutine */ int zherk_(char *, char *, integer *, integer *, - doublereal *, doublecomplex *, integer *, doublereal *, + extern /* Subroutine */ int zherk_(char *, char *, integer *, integer *, + doublereal *, doublecomplex *, integer *, doublereal *, doublecomplex *, integer *, ftnlen, ftnlen); char trans[1]; logical upper; char uplos[1]; - extern /* Subroutine */ int zsyrk_(char *, char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, + extern /* Subroutine */ int zsyrk_(char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); doublereal ralpha, errmax; - extern logical lzeres_(char *, char *, integer *, integer *, + extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); char transs[1], transt[1]; @@ -2426,7 +2436,7 @@ static integer c_n1 = -1; } null = n <= 0; if (conj) { - null = null || (k <= 0 || ralpha == 0.) && + null = null || (k <= 0 || ralpha == 0.) && rbeta == 1.; } @@ -2505,7 +2515,7 @@ static integer c_n1 = -1; f_rew(&al__1); } zherk_(uplo, trans, &n, &k, &ralpha, &aa[1], & - lda, &rbeta, &cc[1], &ldc, (ftnlen)1, + lda, &rbeta, &cc[1], &ldc, (ftnlen)1, (ftnlen)1); } else { if (*trace) { @@ -2552,16 +2562,16 @@ static integer c_n1 = -1; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)uplos == *(unsigned + isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; - isame[1] = *(unsigned char *)transs == *(unsigned + isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; if (conj) { isame[4] = rals == ralpha; } else { - isame[4] = als.r == alpha.r && als.i == + isame[4] = als.r == alpha.r && als.i == alpha.i; } isame[5] = lze_(&as[1], &aa[1], &laa); @@ -2569,7 +2579,7 @@ static integer c_n1 = -1; if (conj) { isame[7] = rbets == rbeta; } else { - isame[7] = bets.r == beta.r && bets.i == + isame[7] = bets.r == beta.r && bets.i == beta.i; } if (null) { @@ -2623,19 +2633,19 @@ static integer c_n1 = -1; } if (tran) { zmmch_(transt, "N", &lj, &c__1, &k, & - alpha, &a[jj * a_dim1 + 1], - nmax, &a[j * a_dim1 + 1], - nmax, &beta, &c__[jj + j * - c_dim1], nmax, &ct[1], &g[1], - &cc[jc], &ldc, eps, &err, + alpha, &a[jj * a_dim1 + 1], + nmax, &a[j * a_dim1 + 1], + nmax, &beta, &c__[jj + j * + c_dim1], nmax, &ct[1], &g[1], + &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { zmmch_("N", transt, &lj, &c__1, &k, & - alpha, &a[jj + a_dim1], nmax, + alpha, &a[jj + a_dim1], nmax, &a[j + a_dim1], nmax, &beta, & c__[jj + j * c_dim1], nmax, & - ct[1], &g[1], &cc[jc], &ldc, + ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, & c_true, (ftnlen)1, (ftnlen)1); } @@ -2743,12 +2753,12 @@ static integer c_n1 = -1; } /* zchk4_ */ -/* Subroutine */ int zchk5_(char *sname, doublereal *eps, doublereal *thresh, +/* Subroutine */ int zchk5_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *nbet, doublecomplex *bet, integer *nmax, doublecomplex * - ab, doublecomplex *aa, doublecomplex *as, doublecomplex *bb, - doublecomplex *bs, doublecomplex *c__, doublecomplex *cc, + ab, doublecomplex *aa, doublecomplex *as, doublecomplex *bb, + doublecomplex *bs, doublecomplex *c__, doublecomplex *cc, doublecomplex *cs, doublecomplex *ct, doublereal *g, doublecomplex *w, ftnlen sname_len) { @@ -2807,30 +2817,30 @@ static integer c_n1 = -1; doublecomplex alpha; doublereal rbeta; logical isame[13]; - extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, + extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; - extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *, doublereal *, doublecomplex *, - integer *, doublereal *, doublereal *, logical *, integer *, + extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, doublereal *, doublecomplex *, + integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); doublereal rbets; logical reset; char trans[1]; logical upper; char uplos[1]; - extern /* Subroutine */ int zher2k_(char *, char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublereal *, doublecomplex *, integer *, ftnlen, - ftnlen), zsyr2k_(char *, char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, + extern /* Subroutine */ int zher2k_(char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublereal *, doublecomplex *, integer *, ftnlen, + ftnlen), zsyr2k_(char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); doublereal errmax; - extern logical lzeres_(char *, char *, integer *, integer *, + extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); char transs[1], transt[1]; @@ -2986,7 +2996,7 @@ static integer c_n1 = -1; } null = n <= 0; if (conj) { - null = null || (k <= 0 || alpha.r == 0. && + null = null || (k <= 0 || alpha.r == 0. && alpha.i == 0.) && rbeta == 1.; } @@ -3121,9 +3131,9 @@ static integer c_n1 = -1; /* See what data changed inside subroutines. */ - isame[0] = *(unsigned char *)uplos == *(unsigned + isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; - isame[1] = *(unsigned char *)transs == *(unsigned + isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; @@ -3135,7 +3145,7 @@ static integer c_n1 = -1; if (conj) { isame[9] = rbets == rbeta; } else { - isame[9] = bets.r == beta.r && bets.i == + isame[9] = bets.r == beta.r && bets.i == beta.i; } if (null) { @@ -3191,20 +3201,20 @@ static integer c_n1 = -1; i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; - i__8 = (j - 1 << 1) * *nmax + k + + i__8 = (j - 1 << 1) * *nmax + k + i__; - z__1.r = alpha.r * ab[i__8].r - - alpha.i * ab[i__8].i, + z__1.r = alpha.r * ab[i__8].r - + alpha.i * ab[i__8].i, z__1.i = alpha.r * ab[ i__8].i + alpha.i * ab[ i__8].r; - w[i__7].r = z__1.r, w[i__7].i = + w[i__7].r = z__1.r, w[i__7].i = z__1.i; if (conj) { i__7 = k + i__; d_cnjg(&z__2, &alpha); i__8 = (j - 1 << 1) * *nmax + i__; - z__1.r = z__2.r * ab[i__8].r - z__2.i * ab[i__8].i, + z__1.r = z__2.r * ab[i__8].r - z__2.i * ab[i__8].i, z__1.i = z__2.r * ab[i__8].i + z__2.i * ab[ i__8].r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; @@ -3212,7 +3222,7 @@ static integer c_n1 = -1; i__7 = k + i__; i__8 = (j - 1 << 1) * *nmax + i__; z__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] - .i, z__1.i = alpha.r * ab[i__8].i + alpha.i + .i, z__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; } @@ -3223,9 +3233,9 @@ static integer c_n1 = -1; i__8 = *nmax << 1; zmmch_(transt, "N", &lj, &c__1, &i__6, &c_b2, &ab[jjab], &i__7, &w[ - 1], &i__8, &beta, &c__[jj + j + 1], &i__8, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1] - , &cc[jc], &ldc, eps, &err, + , &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { @@ -3234,14 +3244,14 @@ static integer c_n1 = -1; if (conj) { i__7 = i__; d_cnjg(&z__2, &ab[(k + i__ - 1) * *nmax + j]); - z__1.r = alpha.r * z__2.r - alpha.i * z__2.i, - z__1.i = alpha.r * z__2.i + alpha.i * + z__1.r = alpha.r * z__2.r - alpha.i * z__2.i, + z__1.i = alpha.r * z__2.i + alpha.i * z__2.r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; i__7 = k + i__; i__8 = (i__ - 1) * *nmax + j; z__2.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] - .i, z__2.i = alpha.r * ab[i__8].i + alpha.i + .i, z__2.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; d_cnjg(&z__1, &z__2); w[i__7].r = z__1.r, w[i__7].i = z__1.i; @@ -3249,13 +3259,13 @@ static integer c_n1 = -1; i__7 = i__; i__8 = (k + i__ - 1) * *nmax + j; z__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] - .i, z__1.i = alpha.r * ab[i__8].i + alpha.i + .i, z__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; i__7 = k + i__; i__8 = (i__ - 1) * *nmax + j; z__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] - .i, z__1.i = alpha.r * ab[i__8].i + alpha.i + .i, z__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; } @@ -3265,9 +3275,9 @@ static integer c_n1 = -1; i__7 = *nmax << 1; zmmch_("N", "N", &lj, &c__1, &i__6, & c_b2, &ab[jj], nmax, &w[1], & - i__7, &beta, &c__[jj + j * - c_dim1], nmax, &ct[1], &g[1], - &cc[jc], &ldc, eps, &err, + i__7, &beta, &c__[jj + j * + c_dim1], nmax, &ct[1], &g[1], + &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } @@ -3380,7 +3390,7 @@ static integer c_n1 = -1; } /* zchk5_ */ -/* Subroutine */ int zchke_(integer *isnum, char *srnamt, integer *nout, +/* Subroutine */ int zchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ @@ -3393,37 +3403,37 @@ static integer c_n1 = -1; integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ - doublecomplex a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] + doublecomplex a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] /* was [2][1] */, beta, alpha; doublereal rbeta; - extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, ftnlen, ftnlen), zhemm_(char *, char *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, ftnlen, ftnlen), zherk_(char *, char *, integer *, + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, ftnlen, ftnlen), zhemm_(char *, char *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, ftnlen, ftnlen), zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, integer *, doublereal *, - doublecomplex *, integer *, ftnlen, ftnlen), ztrmm_(char *, char - *, char *, char *, integer *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, - ftnlen, ftnlen, ftnlen), zsymm_(char *, char *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen), ztrmm_(char *, char + *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, + ftnlen, ftnlen, ftnlen), zsymm_(char *, char *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen), ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer * - , doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), - zsyrk_(char *, char *, integer *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, ftnlen, ftnlen), zher2k_(char *, char *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublereal *, doublecomplex *, - integer *, ftnlen, ftnlen), zsyr2k_(char *, char *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, + , doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), + zsyrk_(char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, ftnlen, ftnlen), zher2k_(char *, char *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, + integer *, ftnlen, ftnlen), zsyr2k_(char *, char *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); doublereal ralpha; - extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical + extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ @@ -3485,302 +3495,302 @@ static integer c_n1 = -1; } L10: infoc_1.infot = 1; - zgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; - zgemm_("/", "C", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("/", "C", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; - zgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; - zgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; - zgemm_("C", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; - zgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - zgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - zgemm_("N", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - zgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - zgemm_("C", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - zgemm_("C", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - zgemm_("C", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - zgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - zgemm_("T", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; - zgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - zgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - zgemm_("N", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - zgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - zgemm_("C", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - zgemm_("C", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - zgemm_("C", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - zgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - zgemm_("T", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; - zgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - zgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - zgemm_("N", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - zgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - zgemm_("C", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - zgemm_("C", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - zgemm_("C", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - zgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - zgemm_("T", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; - zgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - zgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - zgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - zgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - zgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, + zgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - zgemm_("C", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - zgemm_("C", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - zgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, + zgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - zgemm_("T", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; - zgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - zgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - zgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, + zgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - zgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, + zgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - zgemm_("N", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - zgemm_("C", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - zgemm_("T", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - zgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - zgemm_("C", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; - zgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - zgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, + zgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - zgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, + zgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - zgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, + zgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - zgemm_("C", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - zgemm_("C", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - zgemm_("C", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("C", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - zgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - zgemm_("T", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; - zgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, + zgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); @@ -4960,9 +4970,9 @@ static integer c_n1 = -1; } /* zchke_ */ -/* Subroutine */ int zmake_(char *type__, char *uplo, char *diag, integer *m, - integer *n, doublecomplex *a, integer *nmax, doublecomplex *aa, - integer *lda, logical *reset, doublecomplex *transl, ftnlen type_len, +/* Subroutine */ int zmake_(char *type__, char *uplo, char *diag, integer *m, + integer *n, doublecomplex *a, integer *nmax, doublecomplex *aa, + integer *lda, logical *reset, doublecomplex *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ @@ -5148,10 +5158,10 @@ static integer c_n1 = -1; } /* zmake_ */ /* Subroutine */ int zmmch_(char *transa, char *transb, integer *m, integer * - n, integer *kk, doublecomplex *alpha, doublecomplex *a, integer *lda, + n, integer *kk, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex * c__, integer *ldc, doublecomplex *ct, doublereal *g, doublecomplex * - cc, integer *ldcc, doublereal *eps, doublereal *err, logical *fatal, + cc, integer *ldcc, doublereal *eps, doublereal *err, logical *fatal, integer *nout, logical *mv, ftnlen transa_len, ftnlen transb_len) { /* Format strings */ @@ -5165,7 +5175,7 @@ static integer c_n1 = -1; " \002,i3)"; /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, cc_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublereal d__1, d__2, d__3, d__4, d__5, d__6; doublecomplex z__1, z__2, z__3, z__4; @@ -5224,9 +5234,9 @@ static integer c_n1 = -1; cc -= cc_offset; /* Function Body */ - trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == + trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; - tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == + tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; ctrana = *(unsigned char *)transa == 'C'; ctranb = *(unsigned char *)transb == 'C'; @@ -5254,17 +5264,17 @@ static integer c_n1 = -1; i__5 = i__; i__6 = i__ + k * a_dim1; i__7 = k + j * b_dim1; - z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7].i, + z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7].i, z__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[ i__7].r; - z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + + z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = i__ + k * a_dim1; i__5 = k + j * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag( &a[i__ + k * a_dim1]), abs(d__2))) * ((d__3 = b[ - i__5].r, abs(d__3)) + (d__4 = d_imag(&b[k + j * + i__5].r, abs(d__3)) + (d__4 = d_imag(&b[k + j * b_dim1]), abs(d__4))); /* L20: */ } @@ -5280,15 +5290,15 @@ static integer c_n1 = -1; i__5 = i__; d_cnjg(&z__3, &a[k + i__ * a_dim1]); i__6 = k + j * b_dim1; - z__2.r = z__3.r * b[i__6].r - z__3.i * b[i__6].i, + z__2.r = z__3.r * b[i__6].r - z__3.i * b[i__6].i, z__2.i = z__3.r * b[i__6].i + z__3.i * b[i__6] .r; - z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + + z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = k + j * b_dim1; - g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = + g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) * (( d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag( &b[k + j * b_dim1]), abs(d__4))); @@ -5308,12 +5318,12 @@ static integer c_n1 = -1; z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7] .i, z__2.i = a[i__6].r * b[i__7].i + a[i__6] .i * b[i__7].r; - z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + + z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = k + j * b_dim1; - g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = + g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) * (( d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag( &b[k + j * b_dim1]), abs(d__4))); @@ -5332,15 +5342,15 @@ static integer c_n1 = -1; i__5 = i__; i__6 = i__ + k * a_dim1; d_cnjg(&z__3, &b[j + k * b_dim1]); - z__2.r = a[i__6].r * z__3.r - a[i__6].i * z__3.i, - z__2.i = a[i__6].r * z__3.i + a[i__6].i * + z__2.r = a[i__6].r * z__3.r - a[i__6].i * z__3.i, + z__2.i = a[i__6].r * z__3.i + a[i__6].i * z__3.r; - z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + + z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = i__ + k * a_dim1; i__5 = j + k * b_dim1; - g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = + g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + k * a_dim1]), abs(d__2))) * (( d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag( &b[j + k * b_dim1]), abs(d__4))); @@ -5360,12 +5370,12 @@ static integer c_n1 = -1; z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7] .i, z__2.i = a[i__6].r * b[i__7].i + a[i__6] .i * b[i__7].r; - z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + + z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = i__ + k * a_dim1; i__5 = j + k * b_dim1; - g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = + g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + k * a_dim1]), abs(d__2))) * (( d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag( &b[j + k * b_dim1]), abs(d__4))); @@ -5385,17 +5395,17 @@ static integer c_n1 = -1; i__5 = i__; d_cnjg(&z__3, &a[k + i__ * a_dim1]); d_cnjg(&z__4, &b[j + k * b_dim1]); - z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, - z__2.i = z__3.r * z__4.i + z__3.i * + z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, + z__2.i = z__3.r * z__4.i + z__3.i * z__4.r; - z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) - * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 + * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag(&b[j + k * b_dim1]), abs(d__4))); /* L120: */ } @@ -5410,17 +5420,17 @@ static integer c_n1 = -1; i__5 = i__; d_cnjg(&z__3, &a[k + i__ * a_dim1]); i__6 = j + k * b_dim1; - z__2.r = z__3.r * b[i__6].r - z__3.i * b[i__6].i, + z__2.r = z__3.r * b[i__6].r - z__3.i * b[i__6].i, z__2.i = z__3.r * b[i__6].i + z__3.i * b[ i__6].r; - z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) - * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 + * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag(&b[j + k * b_dim1]), abs(d__4))); /* L140: */ } @@ -5437,17 +5447,17 @@ static integer c_n1 = -1; i__5 = i__; i__6 = k + i__ * a_dim1; d_cnjg(&z__3, &b[j + k * b_dim1]); - z__2.r = a[i__6].r * z__3.r - a[i__6].i * z__3.i, - z__2.i = a[i__6].r * z__3.i + a[i__6].i * + z__2.r = a[i__6].r * z__3.r - a[i__6].i * z__3.i, + z__2.i = a[i__6].r * z__3.i + a[i__6].i * z__3.r; - z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) - * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 + * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag(&b[j + k * b_dim1]), abs(d__4))); /* L160: */ } @@ -5463,16 +5473,16 @@ static integer c_n1 = -1; i__6 = k + i__ * a_dim1; i__7 = j + k * b_dim1; z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[ - i__7].i, z__2.i = a[i__6].r * b[i__7].i + + i__7].i, z__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[i__7].r; - z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) - * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 + * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag(&b[j + k * b_dim1]), abs(d__4))); /* L180: */ } @@ -5485,17 +5495,17 @@ static integer c_n1 = -1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; - z__2.r = alpha->r * ct[i__4].r - alpha->i * ct[i__4].i, z__2.i = + z__2.r = alpha->r * ct[i__4].r - alpha->i * ct[i__4].i, z__2.i = alpha->r * ct[i__4].i + alpha->i * ct[i__4].r; i__5 = i__ + j * c_dim1; - z__3.r = beta->r * c__[i__5].r - beta->i * c__[i__5].i, z__3.i = + z__3.r = beta->r * c__[i__5].r - beta->i * c__[i__5].i, z__3.i = beta->r * c__[i__5].i + beta->i * c__[i__5].r; z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; ct[i__3].r = z__1.r, ct[i__3].i = z__1.i; i__3 = i__ + j * c_dim1; - g[i__] = ((d__1 = alpha->r, abs(d__1)) + (d__2 = d_imag(alpha), + g[i__] = ((d__1 = alpha->r, abs(d__1)) + (d__2 = d_imag(alpha), abs(d__2))) * g[i__] + ((d__3 = beta->r, abs(d__3)) + ( - d__4 = d_imag(beta), abs(d__4))) * ((d__5 = c__[i__3].r, + d__4 = d_imag(beta), abs(d__4))) * ((d__5 = c__[i__3].r, abs(d__5)) + (d__6 = d_imag(&c__[i__ + j * c_dim1]), abs( d__6))); /* L200: */ @@ -5621,8 +5631,8 @@ logical lze_(doublecomplex *ri, doublecomplex *rj, integer *lr) } /* lze_ */ -logical lzeres_(char *type__, char *uplo, integer *m, integer *n, - doublecomplex *aa, doublecomplex *as, integer *lda, ftnlen type_len, +logical lzeres_(char *type__, char *uplo, integer *m, integer *n, + doublecomplex *aa, doublecomplex *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ @@ -5807,7 +5817,7 @@ doublereal ddiff_(doublereal *x, doublereal *y) } /* ddiff_ */ -/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, +/* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ diff --git a/common.mk b/common.mk index 05fd1ad73d..b221fa47ee 100644 --- a/common.mk +++ b/common.mk @@ -881,13 +881,8 @@ endif # Threading flags for HPX ifneq ($(findstring hpx,$(THREADING_MODEL)),) -ifneq ($(findstring yes,$(ENABLE_DEBUG)),) -HPX_CXXFLAGS := $(shell pkg-config --cflags hpx_component_debug) -HPX_LDFLAGS := $(filter-out -shared,$(shell pkg-config --libs hpx_component_debug)) -else HPX_CXXFLAGS := $(shell pkg-config --cflags hpx_component) HPX_LDFLAGS := $(filter-out -shared,$(shell pkg-config --libs hpx_component)) -endif CTHREADFLAGS += $(filter-out -std=%,$(HPX_CXXFLAGS)) LDFLAGS += $(HPX_LDFLAGS) ifeq ($(OS_NAME),Darwin) diff --git a/configure b/configure index 133d0bbe7b..4a43884cab 100755 --- a/configure +++ b/configure @@ -3671,7 +3671,7 @@ main() # (that is, absent any explicit choice at runtime). if [ "x${enable_openmp}" = "xyes" ] || [ "x${enable_pthreads}" = "xyes" ] || - [ "x${enable_hpx}" = "xyes" ]; then + [ "x${enable_hpx}" = "xyes" ]; then if [ "x${first_tm}" = "xsingle" ]; then echo "${script_name}: threading will default to single-threaded." diff --git a/frame/thread/bli_thrcomm.c b/frame/thread/bli_thrcomm.c index 3b47977885..8032b84a9d 100644 --- a/frame/thread/bli_thrcomm.c +++ b/frame/thread/bli_thrcomm.c @@ -106,7 +106,7 @@ static thrcomm_cleanup_ft cleanup_fpa[ BLIS_NUM_THREAD_IMPLS ] = NULL, #endif [BLIS_HPX] = -#if defined(BLIS_ENABLE_PTHREADS) +#if defined(BLIS_ENABLE_HPX) bli_thrcomm_cleanup_hpx, #else NULL, @@ -128,7 +128,7 @@ static thrcomm_barrier_ft barrier_fpa[ BLIS_NUM_THREAD_IMPLS ] = bli_thrcomm_barrier_single, #endif [BLIS_HPX] = -#if defined(BLIS_ENABLE_PTHREADS) +#if defined(BLIS_ENABLE_HPX) bli_thrcomm_barrier_hpx, #else bli_thrcomm_barrier_single, diff --git a/frame/thread/bli_thread_hpx.cpp b/frame/thread/bli_thread_hpx.cpp index 226383bd2c..38c92481d3 100644 --- a/frame/thread/bli_thread_hpx.cpp +++ b/frame/thread/bli_thread_hpx.cpp @@ -38,8 +38,11 @@ #include #include +#include extern "C" +{ + void bli_thread_launch_hpx ( dim_t n_threads, @@ -66,4 +69,17 @@ void bli_thread_launch_hpx bli_thrcomm_free( gl_comm_pool, gl_comm ); } +void bli_thread_initialize_hpx( int argc, char** argv ) +{ + hpx::start( nullptr, argc, argv ); +} + +int bli_thread_finalize_hpx() +{ + hpx::apply([]() { hpx::finalize(); }); + return hpx::stop(); +} + +} // extern "C" + #endif diff --git a/frame/thread/bli_thread_hpx.h b/frame/thread/bli_thread_hpx.h index 95d5c1fad9..55d2758a9d 100644 --- a/frame/thread/bli_thread_hpx.h +++ b/frame/thread/bli_thread_hpx.h @@ -45,6 +45,10 @@ void bli_thread_launch_hpx const void* params ); +void bli_thread_initialize_hpx( int argc, char** argv ); + +int bli_thread_finalize_hpx(); + #endif #endif diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index 76c219c312..7ca314c5f3 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -66,6 +66,10 @@ int main( int argc, char** argv ) test_params_t params; test_ops_t ops; +#ifdef BLIS_ENABLE_HPX + bli_thread_initialize_hpx( 1, argv ); +#endif + // Initialize libblis. //bli_init(); @@ -88,8 +92,12 @@ int main( int argc, char** argv ) // Finalize libblis. bli_finalize(); +#ifdef BLIS_ENABLE_HPX + return bli_thread_finalize_hpx(); +#else // Return peacefully. return 0; +#endif } From ba96f67e917d136e71ae29f1b920f8c368f65c34 Mon Sep 17 00:00:00 2001 From: "ct.clmsn" Date: Thu, 10 Nov 2022 16:39:38 -0500 Subject: [PATCH 20/21] build fixes for c++ --- Makefile | 2 -- build/bli_config.h.in | 2 -- build/libblis-symbols.def | 2 ++ 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 6dd48f9090..4888145dc6 100644 --- a/Makefile +++ b/Makefile @@ -554,14 +554,12 @@ else endif $(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.cpp $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) -ifneq ($(findstring hpx,$(THREADING_MODEL)),) ifeq ($(ENABLE_VERBOSE),yes) $(CXX) $(call get-addon-cxxflags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-addon-cxxtext-for,$(1)) @$(CXX) $(call get-addon-cxxflags-for,$(1)) -c $$< -o $$@ endif -endif endef # first argument: a kernel set (name) being targeted (e.g. haswell). diff --git a/build/bli_config.h.in b/build/bli_config.h.in index 716b6e22fc..41e76d2144 100644 --- a/build/bli_config.h.in +++ b/build/bli_config.h.in @@ -72,8 +72,6 @@ #endif #endif - - #if @enable_jrir_slab@ #define BLIS_ENABLE_JRIR_SLAB #endif diff --git a/build/libblis-symbols.def b/build/libblis-symbols.def index db20ffbca4..4bc91784c9 100644 --- a/build/libblis-symbols.def +++ b/build/libblis-symbols.def @@ -557,6 +557,8 @@ bli_info_get_enable_openmp_as_default bli_info_get_enable_pba_pools bli_info_get_enable_pthreads bli_info_get_enable_pthreads_as_default +bli_info_get_enable_hpx +bli_info_get_enable_hpx_as_default bli_info_get_enable_sandbox bli_info_get_enable_sba_pools bli_info_get_enable_threading From 0ef4b649e13f6834389cc49a4bfb6b1f7bf3b5fb Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Sun, 13 Nov 2022 16:03:20 -0600 Subject: [PATCH 21/21] Don't build cpp files at all when HPX threading not enabled. --- Makefile | 8 +++++--- common.mk | 12 ++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 4888145dc6..33641f8c85 100644 --- a/Makefile +++ b/Makefile @@ -553,12 +553,14 @@ else @$(CC) $(call get-frame-cflags-for,$(1)) -c $$< -o $$@ endif +ifneq ($(findstring hpx,$(THREADING_MODEL)),) $(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.cpp $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) - $(CXX) $(call get-addon-cxxflags-for,$(1)) -c $$< -o $$@ + $(CXX) $(call get-frame-cxxflags-for,$(1)) -c $$< -o $$@ else - @echo "Compiling $$@" $(call get-addon-cxxtext-for,$(1)) - @$(CXX) $(call get-addon-cxxflags-for,$(1)) -c $$< -o $$@ + @echo "Compiling $$@" $(call get-frame-cxxtext-for,$(1)) + @$(CXX) $(call get-frame-cxxflags-for,$(1)) -c $$< -o $$@ +endif endif endef diff --git a/common.mk b/common.mk index b221fa47ee..119d09e872 100644 --- a/common.mk +++ b/common.mk @@ -152,6 +152,13 @@ get-frame-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(BUILD_SYMFLAGS) \ ) +get-frame-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + get-kernel-cflags-for = $(strip $(call load-var-for,CKOPTFLAGS,$(1)) \ $(call load-var-for,CKVECFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ @@ -225,6 +232,7 @@ get-refinit-text-for = "('$(1)' CFLAGS for ref. kernel init)" get-refkern-text-for = "('$(1)' CFLAGS for ref. kernels)" get-config-text-for = "('$(1)' CFLAGS for config code)" get-frame-text-for = "('$(1)' CFLAGS for framework code)" +get-frame-cxxtext-for = "('$(1)' CXXFLAGS for framework code)" get-kernel-text-for = "('$(1)' CFLAGS for kernels)" get-addon-c99text-for = "('$(1)' CFLAGS for addons)" get-addon-cxxtext-for = "('$(1)' CXXFLAGS for addons)" @@ -349,7 +357,11 @@ REFNM := ref # Source suffixes. CONFIG_SRC_SUFS := c KERNELS_SRC_SUFS := c s S +ifneq ($(findstring hpx,$(THREADING_MODEL)),) FRAME_SRC_SUFS := c cpp +else +FRAME_SRC_SUFS := c +endif ADDON_C99_SUFS := c ADDON_CXX_SUFS := cc cpp cxx